diff --git a/.binder/requirements.txt b/.binder/requirements.txt index 51ca95be6785e..bd2b70f5f43b0 100644 --- a/.binder/requirements.txt +++ b/.binder/requirements.txt @@ -1,4 +1,4 @@ ---find-links https://pypi.anaconda.org/scipy-wheels-nightly/simple/scikit-learn +--find-links https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/scikit-learn --pre matplotlib scikit-image @@ -7,3 +7,4 @@ seaborn Pillow sphinx-gallery scikit-learn +polars diff --git a/.circleci/config.yml b/.circleci/config.yml index 4408d2bc36de7..7a98f88b813ad 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2.1 jobs: lint: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 steps: - checkout - run: @@ -11,19 +11,23 @@ jobs: command: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint - run: name: linting command: ./build_tools/linting.sh doc-min-dependencies: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 environment: - MKL_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - LOCK_FILE: build_tools/circle/doc_min_dependencies_linux-64_conda.lock + # Do not fail if the documentation build generates warnings with minimum + # dependencies as long as we can avoid raising warnings with more recent + # versions of the same dependencies. + - SKLEARN_WARNINGS_AS_ERRORS: '0' steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -52,12 +56,15 @@ jobs: doc: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 environment: - MKL_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - LOCK_FILE: build_tools/circle/doc_linux-64_conda.lock + # Make sure that we fail if the documentation build generates warnings with + # recent versions of the dependencies. + - SKLEARN_WARNINGS_AS_ERRORS: '1' steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -91,7 +98,7 @@ jobs: deploy: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh diff --git a/.cirrus.star b/.cirrus.star index 8b3de0d10c532..f0b458d74289a 100644 --- a/.cirrus.star +++ b/.cirrus.star @@ -14,7 +14,7 @@ def main(ctx): # Nightly jobs always run if env.get("CIRRUS_CRON", "") == "nightly": - return fs.read(arm_wheel_yaml) + return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml) # Get commit message for event. We can not use `git` here because there is # no command line access in starlark. Thus we need to query the GitHub API @@ -26,10 +26,12 @@ def main(ctx): response = http.get(url).json() commit_msg = response["message"] - if "[skip ci]" in commit_msg: - return [] + jobs_to_run = "" if "[cd build]" in commit_msg or "[cd build cirrus]" in commit_msg: - return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml) + jobs_to_run += fs.read(arm_wheel_yaml) + + if "[cirrus arm]" in commit_msg: + jobs_to_run += fs.read(arm_tests_yaml) - return fs.read(arm_tests_yaml) + return jobs_to_run diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 66991b140c2b6..b261320543fa7 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -28,3 +28,9 @@ d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b # PR 26110: Update black to 23.3.0 893d5accaf9d16f447645e704f85a216187564f7 + +# PR 26649: Add isort and ruff rules +42173fdb34b5aded79664e045cada719dfbe39dc + +# PR #28802: Update black to 24.3.0 +c4c546355667b070edd5c892b206aa4a97af9a0b diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000..f45e0f29ccfa2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +.* export-ignore +asv_benchmarks export-ignore +azure-pipelines.yml export-ignore +benchmarks export-ignore +build_tools export-ignore +maint_tools export-ignore diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index df6843304f443..8d9c592ccdc13 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -9,9 +9,9 @@ contact_links: - name: Mailing list url: https://mail.python.org/mailman/listinfo/scikit-learn about: General discussions and announcements on the mailing list - - name: Gitter - url: https://gitter.im/scikit-learn/scikit-learn - about: Users and developers can sometimes be found on the gitter channel + - name: Discord server + url: https://discord.gg/h9qyrK8Jc8 + about: Developers and users can be found on the Discord server - name: Blank issue url: https://github.com/scikit-learn/scikit-learn/issues/new - about: Please note that Github Discussions should be used in most cases instead + about: Please note that GitHub Discussions should be used in most cases instead diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8528d5386b58a..f59f9bc2fbcd7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,7 +26,7 @@ review, either the pull request needs some benchmarking, tinkering, convincing, etc. or more likely the reviewers are simply busy. In either case, we ask for your understanding during the review process. For more information, see our FAQ on this topic: -http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention. +https://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention. Thanks for contributing! --> diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py index ddf9bda3492de..9a689b8db09b4 100644 --- a/.github/scripts/label_title_regex.py +++ b/.github/scripts/label_title_regex.py @@ -1,10 +1,12 @@ """Labels PRs based on title. Must be run in a github action with the pull_request_target event.""" -from github import Github -import os + import json +import os import re +from github import Github + context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) repo = context_dict["repository"] diff --git a/.github/workflows/artifact-redirector.yml b/.github/workflows/artifact-redirector.yml index 3fdbc06fac386..690cacefda935 100644 --- a/.github/workflows/artifact-redirector.yml +++ b/.github/workflows/artifact-redirector.yml @@ -15,7 +15,7 @@ jobs: name: Run CircleCI artifacts redirector steps: - name: GitHub Action step - uses: larsoner/circleci-artifacts-redirector-action@master + uses: scientific-python/circleci-artifacts-redirector-action@v1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} api-token: ${{ secrets.CIRCLECI_TOKEN }} diff --git a/.github/workflows/assign.yml b/.github/workflows/assign.yml index 9f87b8fa7e0f9..fa3b6f95a5e95 100644 --- a/.github/workflows/assign.yml +++ b/.github/workflows/assign.yml @@ -20,5 +20,8 @@ jobs: steps: - run: | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted + gh issue edit $ISSUE --add-assignee ${{ github.event.comment.user.login }} + gh issue edit $ISSUE --remove-label "help wanted" + env: + GH_TOKEN: ${{ github.token }} + ISSUE: ${{ github.event.issue.html_url }} diff --git a/.github/workflows/check-manifest.yml b/.github/workflows/check-sdist.yml similarity index 71% rename from .github/workflows/check-manifest.yml rename to .github/workflows/check-sdist.yml index 004cc452e385e..c02af711bdb6c 100644 --- a/.github/workflows/check-manifest.yml +++ b/.github/workflows/check-sdist.yml @@ -1,33 +1,33 @@ -name: "Check Manifest" +name: "Check sdist" on: schedule: - cron: '0 0 * * *' jobs: - check-manifest: + check-sdist: # Don't run on forks if: github.repository == 'scikit-learn/scikit-learn' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Install dependencies # scipy and cython are required to build sdist run: | python -m pip install --upgrade pip - pip install check-manifest scipy cython + pip install check-sdist - run: | - check-manifest -v + check-sdist --inject-junk update-tracker: uses: ./.github/workflows/update_tracking_issue.yml if: ${{ always() }} - needs: [check-manifest] + needs: [check-sdist] with: - job_status: ${{ needs.check-manifest.result }} + job_status: ${{ needs.check-sdist.result }} secrets: BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000000..4d38b22d71ab8 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,73 @@ +name: "CodeQL" + +on: + push: + branches: [ "main", "*.X" ] + pull_request: + branches: [ "main", "*.X" ] + schedule: + - cron: '0 6 * * 1' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: 'ubuntu-latest' + timeout-minutes: 360 + permissions: + # required for all workflows + security-events: write + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + language: [ 'javascript-typescript', 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/labeler-title-regex.yml b/.github/workflows/labeler-title-regex.yml index f610aecbdb4e1..10195eca13a73 100644 --- a/.github/workflows/labeler-title-regex.yml +++ b/.github/workflows/labeler-title-regex.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Install PyGithub diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000..30dd632289b6e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,103 @@ +# This linter job on GH actions is used to trigger the commenter bot +# in bot-lint-comment.yml file. It stores the output of the linter to be used +# by the commenter bot. +name: linter + +on: + - pull_request_target + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + + # setting any permission will set everything else to none for GITHUB_TOKEN + permissions: + pull-requests: none + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install dependencies + run: | + source build_tools/shared.sh + # Include pytest compatibility with mypy + pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint + # we save the versions of the linters to be used in the error message later. + python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt + + - name: Run linting + id: lint-script + # We download the linting script from main, since this workflow is run + # from main itself. + run: | + curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/linting.sh --retry 5 -o ./build_tools/linting.sh + set +e + ./build_tools/linting.sh &> /tmp/linting_output.txt + cat /tmp/linting_output.txt + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: lint-log + path: | + /tmp/linting_output.txt + /tmp/versions.txt + retention-days: 1 + + comment: + needs: lint + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + + # We need these permissions to be able to post / update comments + permissions: + pull-requests: write + issues: write + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install dependencies + run: python -m pip install requests + + - name: Download artifact + id: download-artifact + uses: actions/download-artifact@v4 + with: + name: lint-log + + - name: Print log + run: cat linting_output.txt + + - name: Process Comments + id: process-comments + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BRANCH_SHA: ${{ github.event.pull_request.head.sha }} + RUN_ID: ${{ github.run_id }} + LOG_FILE: linting_output.txt + VERSIONS_FILE: versions.txt + run: python ./build_tools/get_comment.py diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 826aa0ed8a4b1..b8940ae133ad9 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -13,12 +13,13 @@ on: jobs: publish: runs-on: ubuntu-latest + environment: publish_pypi permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install dependencies diff --git a/.github/workflows/unassign.yml b/.github/workflows/unassign.yml index c73b854530ff7..94a50d49839d6 100644 --- a/.github/workflows/unassign.yml +++ b/.github/workflows/unassign.yml @@ -18,4 +18,7 @@ jobs: if: github.event.issue.state == 'open' run: | echo "Marking issue ${{ github.event.issue.number }} as help wanted" - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels + gh issue edit $ISSUE --add-label "help wanted" + env: + GH_TOKEN: ${{ github.token }} + ISSUE: ${{ github.event.issue.html_url }} diff --git a/.github/workflows/update-lock-files.yml b/.github/workflows/update-lock-files.yml new file mode 100644 index 0000000000000..50d62c85d00a6 --- /dev/null +++ b/.github/workflows/update-lock-files.yml @@ -0,0 +1,71 @@ +# Workflow to update lock files +name: Update lock files + +on: + workflow_dispatch: + schedule: + - cron: '0 5 * * 1' + +jobs: + update_lock_files: + if: github.repository == 'scikit-learn/scikit-learn' + runs-on: ubuntu-latest + + strategy: + # Ensure that each build will continue even if one build in the matrix fails + fail-fast: false + matrix: + include: + - name: main + update_script_args: "--select-tag main-ci" + additional_commit_message: "[doc build]" + - name: scipy-dev + update_script_args: "--select-tag scipy-dev" + additional_commit_message: "[scipy-dev]" + - name: cirrus-arm + update_script_args: "--select-tag arm" + additional_commit_message: "[cirrus arm]" + - name: pypy + update_script_args: "--select-tag pypy" + additional_commit_message: "[pypy]" + + steps: + - uses: actions/checkout@v4 + - name: Generate lock files + run: | + source build_tools/shared.sh + source $CONDA/bin/activate + conda install -n base conda conda-libmamba-solver -y + conda config --set solver libmamba + conda install -c conda-forge "$(get_dep conda-lock min)" -y + + python build_tools/update_environments_and_lock_files.py ${{ matrix.update_script_args }} + + - name: Create Pull Request + id: cpr + uses: peter-evans/create-pull-request@v5 + with: + token: ${{ secrets.BOT_GITHUB_TOKEN }} + push-to-fork: scikit-learn-bot/scikit-learn + commit-message: Update CI lock files ${{ matrix.additional_commit_message }} + committer: "Lock file bot " + author: "Lock file bot " + delete-branch: true + branch: auto-update-lock-files-${{ matrix.name }} + title: ":lock: :robot: CI Update lock files for ${{ matrix.name }} CI build(s) :lock: :robot:" + body: | + Update lock files. + + ### Note + If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch. + + - name: Check Pull Request + if: steps.cpr.outputs.pull-request-number != '' + run: | + echo "### :rocket: Pull-Request Summary" >> ${GITHUB_STEP_SUMMARY} + echo "" >> ${GITHUB_STEP_SUMMARY} + echo "The following lock files pull-request has been auto-generated:" + echo "- **PR** #${{ steps.cpr.outputs.pull-request-number }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **URL** ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **Operation** [${{ steps.cpr.outputs.pull-request-operation }}]" >> ${GITHUB_STEP_SUMMARY} + echo "- **SHA** ${{ steps.cpr.outputs.pull-request-head-sha }}" >> ${GITHUB_STEP_SUMMARY} diff --git a/.github/workflows/update_tracking_issue.yml b/.github/workflows/update_tracking_issue.yml index 124ea1e8c6ac4..d4538fe6848d8 100644 --- a/.github/workflows/update_tracking_issue.yml +++ b/.github/workflows/update_tracking_issue.yml @@ -27,7 +27,7 @@ jobs: if: github.repository == 'scikit-learn/scikit-learn' && github.event_name == 'schedule' steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Update tracking issue on GitHub diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b43f29ffa4f7f..632425dcd046e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -40,7 +40,7 @@ jobs: name: Check build trigger run: bash build_tools/github/check_build_trigger.sh - # Build the wheels for Linux, Windows and macOS for Python 3.8 and newer + # Build the wheels for Linux, Windows and macOS for Python 3.9 and newer build_wheels: name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform_id }}-${{ matrix.manylinux_image }} runs-on: ${{ matrix.os }} @@ -53,11 +53,6 @@ jobs: matrix: include: # Window 64 bit - # Note: windows-2019 is needed for older Python versions: - # https://github.com/scikit-learn/scikit-learn/issues/22530 - - os: windows-2019 - python: 38 - platform_id: win_amd64 - os: windows-latest python: 39 platform_id: win_amd64 @@ -67,12 +62,11 @@ jobs: - os: windows-latest python: 311 platform_id: win_amd64 + - os: windows-latest + python: 312 + platform_id: win_amd64 # Linux 64 bit manylinux2014 - - os: ubuntu-latest - python: 38 - platform_id: manylinux_x86_64 - manylinux_image: manylinux2014 - os: ubuntu-latest python: 39 platform_id: manylinux_x86_64 @@ -88,54 +82,102 @@ jobs: python: 311 platform_id: manylinux_x86_64 manylinux_image: manylinux2014 + - os: ubuntu-latest + python: 312 + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 # MacOS x86_64 - - os: macos-latest - python: 38 - platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 39 platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 310 platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 311 platform_id: macosx_x86_64 + - os: macos-12 + python: 312 + platform_id: macosx_x86_64 + + # MacOS arm64 + - os: macos-14 + python: 39 + platform_id: macosx_arm64 + - os: macos-14 + python: 310 + platform_id: macosx_arm64 + - os: macos-14 + python: 311 + platform_id: macosx_arm64 + - os: macos-14 + python: 312 + platform_id: macosx_arm64 steps: - name: Checkout scikit-learn uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.9' # update once build dependencies are available + python-version: "3.11" # update once build dependencies are available + + - name: Install conda for macos arm64 + if: ${{ matrix.platform_id == 'macosx_arm64' }} + run: | + set -ex + # macos arm64 runners do not have conda installed. Thus we much install conda manually + EXPECTED_SHA="dd832d8a65a861b5592b2cf1d55f26031f7c1491b30321754443931e7b1e6832" + MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-MacOSX-arm64.sh" + curl -L --retry 10 $MINIFORGE_URL -o miniforge.sh + + # Check SHA + file_sha=$(shasum -a 256 miniforge.sh | awk '{print $1}') + if [ "$EXPECTED_SHA" != "$file_sha" ]; then + echo "SHA values did not match!" + exit 1 + fi + + # Install miniforge + MINIFORGE_PATH=$HOME/miniforge + bash ./miniforge.sh -b -p $MINIFORGE_PATH + echo "$MINIFORGE_PATH/bin" >> $GITHUB_PATH + echo "CONDA_HOME=$MINIFORGE_PATH" >> $GITHUB_ENV + + - name: Set conda environment for non-macos arm64 environments + if: ${{ matrix.platform_id != 'macosx_arm64' }} + run: | + # Non-macos arm64 envrionments already have conda installed + echo "CONDA_HOME=/usr/local/miniconda" >> $GITHUB_ENV - name: Build and test wheels env: - CONFTEST_PATH: ${{ github.workspace }}/conftest.py - CONFTEST_NAME: conftest.py + CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease }} CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 - SKLEARN_BUILD_PARALLEL=3 + SKLEARN_BUILD_PARALLEL=3 CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} CIBW_ARCHS: all CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }} CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }} - CIBW_TEST_SKIP: "*-macosx_arm64" + # Needed on Windows CI to compile with Visual Studio compiler + # otherwise Meson detects a MINGW64 platform and use MINGW64 + # toolchain + CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir} CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }} - CIBW_TEST_REQUIRES: pytest pandas threadpoolctl + CIBW_TEST_REQUIRES: pytest pandas ${{ matrix.python == 312 && 'numpy>=2.0.0rc2' || '' }} CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }} CIBW_BUILD_VERBOSITY: 1 - CONDA_HOME: /usr/local/miniconda run: bash build_tools/wheels/build_wheels.sh - name: Store artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: cibw-wheels-cp${{ matrix.python }}-${{ matrix.platform_id }} path: wheelhouse/*.whl update-tracker: @@ -159,9 +201,9 @@ jobs: uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.9' # update once build dependencies are available + python-version: "3.9" # update once build dependencies are available - name: Build source distribution run: bash build_tools/github/build_source.sh @@ -174,14 +216,16 @@ jobs: SKLEARN_SKIP_NETWORK_TESTS: 1 - name: Store artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: cibw-sdist path: dist/*.tar.gz # Upload the wheels and the source distribution upload_anaconda: name: Upload to Anaconda runs-on: ubuntu-latest + environment: upload_anaconda needs: [build_wheels, build_sdist] # The artifacts cannot be uploaded on PRs if: github.event_name != 'pull_request' @@ -191,18 +235,20 @@ jobs: uses: actions/checkout@v3 - name: Download artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: + pattern: cibw-* path: dist + merge-multiple: true - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 - name: Upload artifacts env: # Secret variables need to be mapped to environment variables explicitly SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }} SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_STAGING_UPLOAD_TOKEN }} - ARTIFACTS_PATH: dist/artifact + ARTIFACTS_PATH: dist # Force a replacement if the remote file already exists run: bash build_tools/github/upload_anaconda.sh diff --git a/.gitignore b/.gitignore index f4601a15655a5..61c89bcb96491 100644 --- a/.gitignore +++ b/.gitignore @@ -13,10 +13,15 @@ sklearn/**/*.html dist/ MANIFEST +doc/sg_execution_times.rst doc/_build/ +doc/api/*.rst doc/auto_examples/ +doc/css/* +!doc/css/.gitkeep doc/modules/generated/ doc/datasets/generated/ +doc/index.rst doc/min_dependency_table.rst doc/min_dependency_substitutions.rst *.pdf @@ -53,11 +58,15 @@ nips2010_pdf/ examples/cluster/joblib reuters/ benchmarks/bench_covertype_data/ +benchmarks/HIGGS.csv.gz +bench_pca_solvers.csv *.prefs .pydevproject .idea .vscode +# used by pyenv +.python-version *.c *.cpp @@ -99,6 +108,10 @@ sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors_classmode.pyx +sklearn/neighbors/_ball_tree.pyx +sklearn/neighbors/_binary_tree.pxi +sklearn/neighbors/_kd_tree.pyx # Default JupyterLite content jupyterlite_contents diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 26db27bc827b2..abe14acc7778c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,17 +5,18 @@ repos: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.2.1 + hooks: + - id: ruff + args: ["--fix", "--output-format=full"] - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 24.3.0 hooks: - id: black -- repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - types: [file, python] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 + rev: v1.9.0 hooks: - id: mypy files: sklearn/ @@ -26,3 +27,10 @@ repos: # TODO: add the double-quote-cython-strings hook when it's usability has improved: # possibility to pass a directory and use it as a check instead of auto-formatter. - id: cython-lint +- repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.7.1 + hooks: + - id: prettier + files: ^doc/scss/|^doc/js/scripts/ + exclude: ^doc/js/scripts/vendor/ + types_or: ["scss", "javascript"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f6f65883c65b2..92a673462e3a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,7 +18,7 @@ Documentation can be found under the But there are many other ways to help. In particular answering queries on the [issue tracker](https://github.com/scikit-learn/scikit-learn/issues), investigating bugs, and [reviewing other developers' pull -requests](http://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines) +requests](https://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines) are very valuable contributions that decrease the burden on the project maintainers. @@ -30,8 +30,8 @@ link to it from your website, or simply star it in GitHub to say "I use it". Quick links ----------- -* [Submitting a bug report or feature request](http://scikit-learn.org/dev/developers/contributing.html#submitting-a-bug-report-or-a-feature-request) -* [Contributing code](http://scikit-learn.org/dev/developers/contributing.html#contributing-code) +* [Submitting a bug report or feature request](https://scikit-learn.org/dev/developers/contributing.html#submitting-a-bug-report-or-a-feature-request) +* [Contributing code](https://scikit-learn.org/dev/developers/contributing.html#contributing-code) * [Coding guidelines](https://scikit-learn.org/dev/developers/develop.html#coding-guidelines) * [Tips to read current code](https://scikit-learn.org/dev/developers/contributing.html#reading-the-existing-code-base) diff --git a/COPYING b/COPYING index b161c890897cc..e1cd01d584578 100644 --- a/COPYING +++ b/COPYING @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2007-2023 The scikit-learn developers. +Copyright (c) 2007-2024 The scikit-learn developers. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/MANIFEST.in b/MANIFEST.in index 6087d0922b24e..1596d4cd011df 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,6 @@ include *.rst +include *.build +recursive-include sklearn *.build recursive-include doc * recursive-include examples * recursive-include sklearn *.c *.cpp *.h *.pyx *.pxd *.pxi *.tp diff --git a/Makefile b/Makefile index 5ea64dc0d6cac..52374ba44ff79 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,12 @@ in: inplace # just a shortcut inplace: $(PYTHON) setup.py build_ext -i +dev-meson: + pip install --verbose --no-build-isolation --editable . --config-settings editable-verbose=true + +clean-meson: + pip uninstall -y scikit-learn + test-code: in $(PYTEST) --showlocals -v sklearn --durations=20 test-sphinxext: @@ -61,5 +67,4 @@ doc-noplot: inplace $(MAKE) -C doc html-noplot code-analysis: - flake8 sklearn | grep -v __init__ | grep -v external - pylint -E -i y sklearn/ -d E1103,E0611,E1101 + build_tools/linting.sh diff --git a/README.rst b/README.rst index 80de41a8890a1..4ac297063c26e 100644 --- a/README.rst +++ b/README.rst @@ -1,45 +1,45 @@ .. -*- mode: rst -*- -|Azure|_ |CirrusCI|_ |Codecov|_ |CircleCI|_ |Nightly wheels|_ |Black|_ |PythonVersion|_ |PyPi|_ |DOI|_ |Benchmark|_ +|Azure| |CirrusCI| |Codecov| |CircleCI| |Nightly wheels| |Black| |PythonVersion| |PyPi| |DOI| |Benchmark| .. |Azure| image:: https://dev.azure.com/scikit-learn/scikit-learn/_apis/build/status/scikit-learn.scikit-learn?branchName=main -.. _Azure: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main + :target: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main .. |CircleCI| image:: https://circleci.com/gh/scikit-learn/scikit-learn/tree/main.svg?style=shield -.. _CircleCI: https://circleci.com/gh/scikit-learn/scikit-learn + :target: https://circleci.com/gh/scikit-learn/scikit-learn .. |CirrusCI| image:: https://img.shields.io/cirrus/github/scikit-learn/scikit-learn/main?label=Cirrus%20CI -.. _CirrusCI: https://cirrus-ci.com/github/scikit-learn/scikit-learn/main + :target: https://cirrus-ci.com/github/scikit-learn/scikit-learn/main .. |Codecov| image:: https://codecov.io/gh/scikit-learn/scikit-learn/branch/main/graph/badge.svg?token=Pk8G9gg3y9 -.. _Codecov: https://codecov.io/gh/scikit-learn/scikit-learn + :target: https://codecov.io/gh/scikit-learn/scikit-learn .. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule -.. _`Nightly wheels`: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule + :target: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule -.. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue -.. _PythonVersion: https://pypi.org/project/scikit-learn/ +.. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/scikit-learn.svg + :target: https://pypi.org/project/scikit-learn/ .. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn -.. _PyPi: https://pypi.org/project/scikit-learn + :target: https://pypi.org/project/scikit-learn .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg -.. _Black: https://github.com/psf/black + :target: https://github.com/psf/black .. |DOI| image:: https://zenodo.org/badge/21369/scikit-learn/scikit-learn.svg -.. _DOI: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn + :target: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn .. |Benchmark| image:: https://img.shields.io/badge/Benchmarked%20by-asv-blue -.. _`Benchmark`: https://scikit-learn.org/scikit-learn-benchmarks/ - -.. |PythonMinVersion| replace:: 3.8 -.. |NumPyMinVersion| replace:: 1.17.3 -.. |SciPyMinVersion| replace:: 1.5.0 -.. |JoblibMinVersion| replace:: 1.1.1 -.. |ThreadpoolctlMinVersion| replace:: 2.0.0 -.. |MatplotlibMinVersion| replace:: 3.1.3 -.. |Scikit-ImageMinVersion| replace:: 0.16.2 -.. |PandasMinVersion| replace:: 1.0.5 + :target: https://scikit-learn.org/scikit-learn-benchmarks + +.. |PythonMinVersion| replace:: 3.9 +.. |NumPyMinVersion| replace:: 1.19.5 +.. |SciPyMinVersion| replace:: 1.6.0 +.. |JoblibMinVersion| replace:: 1.2.0 +.. |ThreadpoolctlMinVersion| replace:: 3.1.0 +.. |MatplotlibMinVersion| replace:: 3.3.4 +.. |Scikit-ImageMinVersion| replace:: 0.17.2 +.. |PandasMinVersion| replace:: 1.1.5 .. |SeabornMinVersion| replace:: 0.9.0 .. |PytestMinVersion| replace:: 7.1.2 .. |PlotlyMinVersion| replace:: 5.14.0 @@ -80,7 +80,7 @@ scikit-learn 1.0 and later require Python 3.7 or newer. scikit-learn 1.1 and later require Python 3.8 or newer. Scikit-learn plotting capabilities (i.e., functions start with ``plot_`` and -classes end with "Display") require Matplotlib (>= |MatplotlibMinVersion|). +classes end with ``Display``) require Matplotlib (>= |MatplotlibMinVersion|). For running the examples Matplotlib >= |MatplotlibMinVersion| is required. A few examples require scikit-image >= |Scikit-ImageMinVersion|, a few examples require pandas >= |PandasMinVersion|, some examples require seaborn >= @@ -89,7 +89,7 @@ require pandas >= |PandasMinVersion|, some examples require seaborn >= User installation ~~~~~~~~~~~~~~~~~ -If you already have a working installation of numpy and scipy, +If you already have a working installation of NumPy and SciPy, the easiest way to install scikit-learn is using ``pip``:: pip install -U scikit-learn @@ -184,19 +184,21 @@ Communication ~~~~~~~~~~~~~ - Mailing list: https://mail.python.org/mailman/listinfo/scikit-learn -- Gitter: https://gitter.im/scikit-learn/scikit-learn - Logos & Branding: https://github.com/scikit-learn/scikit-learn/tree/main/doc/logos - Blog: https://blog.scikit-learn.org - Calendar: https://blog.scikit-learn.org/calendar/ - Twitter: https://twitter.com/scikit_learn - Stack Overflow: https://stackoverflow.com/questions/tagged/scikit-learn -- Github Discussions: https://github.com/scikit-learn/scikit-learn/discussions +- GitHub Discussions: https://github.com/scikit-learn/scikit-learn/discussions - Website: https://scikit-learn.org - LinkedIn: https://www.linkedin.com/company/scikit-learn - YouTube: https://www.youtube.com/channel/UCJosFjYm0ZYVUARxuOZqnnw/playlists - Facebook: https://www.facebook.com/scikitlearnofficial/ - Instagram: https://www.instagram.com/scikitlearnofficial/ - TikTok: https://www.tiktok.com/@scikit.learn +- Mastodon: https://mastodon.social/@sklearn@fosstodon.org +- Discord: https://discord.gg/h9qyrK8Jc8 + Citation ~~~~~~~~ diff --git a/SECURITY.md b/SECURITY.md index 9af364e1651e3..18bb99ea3c15c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,10 +2,10 @@ ## Supported Versions -| Version | Supported | -| --------- | ------------------ | -| 1.2.2 | :white_check_mark: | -| < 1.2.2 | :x: | +| Version | Supported | +| ------------- | ------------------ | +| 1.4.2 | :white_check_mark: | +| < 1.4.2 | :x: | ## Reporting a Vulnerability diff --git a/asv_benchmarks/asv.conf.json b/asv_benchmarks/asv.conf.json index 9f65d194b6d84..3392925d7a488 100644 --- a/asv_benchmarks/asv.conf.json +++ b/asv_benchmarks/asv.conf.json @@ -71,13 +71,17 @@ // pip (with all the conda available packages installed first, // followed by the pip installed packages). // + // The versions of the dependencies should be bumped in a dedicated commit + // to easily identify regressions/improvements due to code changes from + // those due to dependency changes. + // "matrix": { - "numpy": [], - "scipy": [], - "cython": [], - "joblib": [], - "threadpoolctl": [], - "pandas": [] + "numpy": ["1.25.2"], + "scipy": ["1.11.2"], + "cython": ["3.0.10"], + "joblib": ["1.3.2"], + "threadpoolctl": ["3.2.0"], + "pandas": ["2.1.0"] }, // Combinations of libraries/python versions can be excluded/included diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py index ba460e6b503a6..457a15dd938e9 100644 --- a/asv_benchmarks/benchmarks/cluster.py +++ b/asv_benchmarks/benchmarks/cluster.py @@ -1,7 +1,7 @@ from sklearn.cluster import KMeans, MiniBatchKMeans from .common import Benchmark, Estimator, Predictor, Transformer -from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset +from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset from .utils import neg_mean_inertia diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py index c3e114a212047..c12da551010f6 100644 --- a/asv_benchmarks/benchmarks/common.py +++ b/asv_benchmarks/benchmarks/common.py @@ -1,11 +1,11 @@ -import os +import itertools import json -import timeit +import os import pickle -import itertools +import timeit from abc import ABC, abstractmethod -from pathlib import Path from multiprocessing import cpu_count +from pathlib import Path import numpy as np @@ -23,7 +23,7 @@ def get_from_config(): n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS") if n_jobs_vals_env: - n_jobs_vals = eval(n_jobs_vals_env) + n_jobs_vals = json.loads(n_jobs_vals_env) else: n_jobs_vals = config["n_jobs_vals"] if not n_jobs_vals: diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py index dbe0eac0b822c..bbf5029062448 100644 --- a/asv_benchmarks/benchmarks/datasets.py +++ b/asv_benchmarks/benchmarks/datasets.py @@ -1,21 +1,22 @@ +from pathlib import Path + import numpy as np import scipy.sparse as sp from joblib import Memory -from pathlib import Path -from sklearn.decomposition import TruncatedSVD from sklearn.datasets import ( - make_blobs, fetch_20newsgroups, + fetch_olivetti_faces, fetch_openml, load_digits, - make_regression, + make_blobs, make_classification, - fetch_olivetti_faces, + make_regression, ) -from sklearn.preprocessing import MaxAbsScaler, StandardScaler +from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MaxAbsScaler, StandardScaler # memory location for caching datasets M = Memory(location=str(Path(__file__).resolve().parent / "cache")) @@ -59,9 +60,7 @@ def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=np.float @M.cache def _mnist_dataset(dtype=np.float32): - X, y = fetch_openml( - "mnist_784", version=1, return_X_y=True, as_frame=False, parser="pandas" - ) + X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False) X = X.astype(dtype, copy=False) X = MaxAbsScaler().fit_transform(X) diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py index 02a7862caeb69..0a7bb7ad07f3e 100644 --- a/asv_benchmarks/benchmarks/decomposition.py +++ b/asv_benchmarks/benchmarks/decomposition.py @@ -1,8 +1,8 @@ from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning from .common import Benchmark, Estimator, Transformer -from .datasets import _olivetti_faces_dataset, _mnist_dataset -from .utils import make_pca_scorers, make_dict_learning_scorers +from .datasets import _mnist_dataset, _olivetti_faces_dataset +from .utils import make_dict_learning_scorers, make_pca_scorers class PCABenchmark(Transformer, Estimator, Benchmark): diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py index 8c5a28e3da90f..c336d1e5f8805 100644 --- a/asv_benchmarks/benchmarks/ensemble.py +++ b/asv_benchmarks/benchmarks/ensemble.py @@ -1,7 +1,7 @@ from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, + RandomForestClassifier, ) from .common import Benchmark, Estimator, Predictor diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index b694a109329f0..24153895611df 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -1,9 +1,9 @@ from sklearn.linear_model import ( - LogisticRegression, - Ridge, ElasticNet, Lasso, LinearRegression, + LogisticRegression, + Ridge, SGDRegressor, ) @@ -52,7 +52,6 @@ def make_estimator(self, params): estimator = LogisticRegression( solver=solver, penalty=penalty, - multi_class="multinomial", tol=0.01, n_jobs=n_jobs, random_state=0, diff --git a/azure-pipelines.yml b/azure-pipelines.yml index dfefda5ccddb9..1f8819d264d77 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -35,7 +35,7 @@ jobs: - bash: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint displayName: Install linters - bash: | ./build_tools/linting.sh @@ -59,11 +59,8 @@ jobs: pylatest_pip_scipy_dev: DISTRIB: 'conda-pip-scipy-dev' LOCK_FILE: './build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock' - CHECK_WARNINGS: 'true' + SKLEARN_WARNINGS_AS_ERRORS: '1' CHECK_PYTEST_SOFT_DEPENDENCY: 'true' - # Tests that require large downloads over the networks are skipped in CI. - # Here we make sure, that they are still run on a regular basis. - SKLEARN_SKIP_NETWORK_TESTS: '0' - template: build_tools/azure/posix-docker.yml # Experimental CPython branch without the Global Interpreter Lock: @@ -127,11 +124,11 @@ jobs: vmImage: ubuntu-22.04 variables: # Need to match Python version and Emscripten version for the correct - # Pyodide version. For Pyodide version 0.23.2, see - # https://github.com/pyodide/pyodide/blob/0.23.2/Makefile.envs - PYODIDE_VERSION: '0.23.2' - EMSCRIPTEN_VERSION: '3.1.32' - PYTHON_VERSION: '3.11.2' + # Pyodide version. For example, for Pyodide version 0.25.1, see + # https://github.com/pyodide/pyodide/blob/0.25.1/Makefile.envs + PYODIDE_VERSION: '0.26.0' + EMSCRIPTEN_VERSION: '3.1.58' + PYTHON_VERSION: '3.12.1' dependsOn: [git_commit, linting] condition: | @@ -150,7 +147,7 @@ jobs: addToPath: true - bash: bash build_tools/azure/install_pyodide.sh - displayName: Build Pyodide wheel and install it in a Pyodide venv + displayName: Build Pyodide wheel - bash: bash build_tools/azure/test_script_pyodide.sh displayName: Test Pyodide wheel @@ -171,8 +168,11 @@ jobs: DISTRIB: 'conda' LOCK_FILE: './build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock' COVERAGE: 'true' - SHOW_SHORT_SUMMARY: 'true' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '42' # default global random seed + # Tests that require large downloads over the networks are skipped in CI. + # Here we make sure, that they are still run on a regular basis. + ${{ if eq(variables['Build.Reason'], 'Schedule') }}: + SKLEARN_SKIP_NETWORK_TESTS: '0' # Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge # By default the CI is sequential, where `Ubuntu_Jammy_Jellyfish` runs first and @@ -192,9 +192,10 @@ jobs: ) commitMessage: dependencies['git_commit']['outputs']['commit.message'] matrix: - py38_conda_forge_openblas_ubuntu_2204: + pymin_conda_forge_openblas_ubuntu_2204: DISTRIB: 'conda' - LOCK_FILE: './build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock' + LOCK_FILE: './build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock' + SKLEARN_WARNINGS_AS_ERRORS: '1' COVERAGE: 'false' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '0' # non-default seed @@ -231,25 +232,31 @@ jobs: not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: - # Linux + Python 3.8 build with OpenBLAS - py38_conda_defaults_openblas: + # Linux + Python 3.9 build with OpenBLAS and without pandas + pymin_conda_defaults_openblas: DISTRIB: 'conda' - LOCK_FILE: './build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock' + LOCK_FILE: './build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock' + # Enable debug Cython directives to capture IndexError exceptions in + # combination with the -Werror::pytest.PytestUnraisableExceptionWarning + # flag for pytest. + # https://github.com/scikit-learn/scikit-learn/pull/24438 SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1' SKLEARN_RUN_FLOAT32_TESTS: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '2' # non-default seed + BUILD_WITH_SETUPTOOLS: 'true' # Linux environment to test the latest available dependencies. # It runs tests requiring lightgbm, pandas and PyAMG. pylatest_pip_openblas_pandas: DISTRIB: 'conda-pip-latest' LOCK_FILE: './build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock' CHECK_PYTEST_SOFT_DEPENDENCY: 'true' - CHECK_WARNINGS: 'true' + SKLEARN_WARNINGS_AS_ERRORS: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '3' # non-default seed # disable pytest-xdist to have 1 job where OpenMP and BLAS are not single # threaded because by default the tests configuration (sklearn/conftest.py) # makes sure that they are single threaded in each xdist subprocess. PYTEST_XDIST_VERSION: 'none' + PIP_BUILD_ISOLATION: 'true' - template: build_tools/azure/posix-docker.yml parameters: @@ -275,7 +282,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: macOS - vmImage: macOS-11 + vmImage: macOS-12 dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] # Runs when dependencies succeeded or skipped condition: | @@ -307,15 +314,19 @@ jobs: not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: - py38_conda_forge_mkl: + pymin_conda_forge_mkl: DISTRIB: 'conda' - LOCK_FILE: ./build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock - CHECK_WARNINGS: 'true' + LOCK_FILE: ./build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock + SKLEARN_WARNINGS_AS_ERRORS: '1' # The Azure Windows runner is typically much slower than other CI # runners due to the lack of compiler cache. Running the tests with # coverage enabled make them run extra slower. Since very few parts of # code should have windows-specific code branches, it should be enable # to restrict the code coverage collection to the non-windows runners. COVERAGE: 'false' + # Enable debug Cython directives to capture IndexError exceptions in + # combination with the -Werror::pytest.PytestUnraisableExceptionWarning + # flag for pytest. + # https://github.com/scikit-learn/scikit-learn/pull/24438 SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '7' # non-default seed diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index c542349839178..44a117f1ad42d 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -1,18 +1,19 @@ -from time import time import argparse -import numpy as np +from time import time -from sklearn.dummy import DummyClassifier +import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.metrics import accuracy_score -from sklearn.utils.validation import check_array - -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import AdaBoostClassifier +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, +) from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score from sklearn.naive_bayes import MultinomialNB +from sklearn.utils.validation import check_array ESTIMATORS = { "dummy": DummyClassifier(), @@ -20,7 +21,7 @@ "extra_trees": ExtraTreesClassifier(max_features="sqrt", min_samples_split=10), "logistic_regression": LogisticRegression(), "naive_bayes": MultinomialNB(), - "adaboost": AdaBoostClassifier(n_estimators=10), + "adaboost": AdaBoostClassifier(n_estimators=10, algorithm="SAMME"), } diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index 8a13a2d9806c6..5b8cdd588c8ee 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -45,20 +45,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory from sklearn.datasets import fetch_covtype, get_data_home -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier, LogisticRegression +from sklearn.ensemble import ( + ExtraTreesClassifier, + GradientBoostingClassifier, + RandomForestClassifier, +) +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.metrics import zero_one_loss from sklearn.naive_bayes import GaussianNB +from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.metrics import zero_one_loss from sklearn.utils import check_array # Memoize the data extraction and memory map the resulting diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index fd5a4f0ebccff..b9d9efbdea4f1 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -1,8 +1,10 @@ +from time import time + import matplotlib.pyplot as plt import numpy as np import scipy.sparse as sparse + from sklearn.preprocessing import PolynomialFeatures -from time import time degree = 2 trials = 3 diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index c6c2a6f5fa117..84cf31858afa7 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -4,10 +4,12 @@ Data comes from a random square matrix. """ + from datetime import datetime + import numpy as np -from sklearn import linear_model +from sklearn import linear_model if __name__ == "__main__": import matplotlib.pyplot as plt diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index 8a0a0545bb627..1aaad99c10587 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -16,9 +16,12 @@ In both cases, only 10% of the features are informative. """ -import numpy as np + import gc from time import time + +import numpy as np + from sklearn.datasets import make_regression alpha = 0.1 @@ -45,11 +48,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef): if __name__ == "__main__": - from glmnet.elastic_net import Lasso as GlmnetLasso - from sklearn.linear_model import Lasso as ScikitLasso - # Delayed import of matplotlib.pyplot import matplotlib.pyplot as plt + from glmnet.elastic_net import Lasso as GlmnetLasso + + from sklearn.linear_model import Lasso as ScikitLasso scikit_results = [] glmnet_results = [] diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py index 163e21f98ed0d..c1dfffabe71c2 100644 --- a/benchmarks/bench_hist_gradient_boosting.py +++ b/benchmarks/bench_hist_gradient_boosting.py @@ -1,15 +1,16 @@ -from time import time import argparse +from time import time import matplotlib.pyplot as plt import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 1b5905b1cf4e8..97c762e8e9230 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -4,15 +4,14 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.compose import make_column_transformer, make_column_selector +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.datasets import fetch_openml -from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split from sklearn.preprocessing import OrdinalEncoder - parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) parser.add_argument("--n-trees", type=int, default=100) @@ -50,7 +49,7 @@ def predict(est, data_test, target_test): print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") -data = fetch_openml(data_id=179, as_frame=True, parser="pandas") # adult dataset +data = fetch_openml(data_id=179, as_frame=True) # adult dataset X, y = data.data, data.target # Ordinal encode the categories to use the native support available in HGBDT diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py index e8d215170f9c8..1085bbc49f4f8 100644 --- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py +++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py @@ -1,11 +1,10 @@ import argparse from time import time -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.preprocessing import KBinsDiscretizer parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index d6ed3b8e9700f..20057c50dc810 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -1,17 +1,17 @@ -from urllib.request import urlretrieve +import argparse import os from gzip import GzipFile from time import time -import argparse +from urllib.request import urlretrieve import numpy as np import pandas as pd from joblib import Memory -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, roc_auc_score + from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -25,6 +25,7 @@ parser.add_argument("--no-predict", action="store_true", default=False) parser.add_argument("--cache-loc", type=str, default="/tmp") parser.add_argument("--no-interactions", type=bool, default=False) +parser.add_argument("--max-features", type=float, default=1.0) args = parser.parse_args() HERE = os.path.dirname(__file__) @@ -36,6 +37,7 @@ subsample = args.subsample lr = args.learning_rate max_bins = args.max_bins +max_features = args.max_features @m.cache @@ -104,6 +106,7 @@ def predict(est, data_test, target_test): random_state=0, verbose=1, interaction_cst=interaction_cst, + max_features=max_features, ) fit(est, data_train, target_train, "sklearn") predict(est, data_test, target_test) diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py index 70787fd2eb479..9acf65bdbaf6a 100644 --- a/benchmarks/bench_hist_gradient_boosting_threading.py +++ b/benchmarks/bench_hist_gradient_boosting_threading.py @@ -1,18 +1,19 @@ -from time import time import argparse import os from pprint import pprint +from time import time import numpy as np from threadpoolctl import threadpool_limits + import sklearn -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -290,8 +291,8 @@ def one_run(n_threads, n_samples): if args.plot or args.plot_filename: - import matplotlib.pyplot as plt import matplotlib + import matplotlib.pyplot as plt fig, axs = plt.subplots(2, figsize=(12, 12)) diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 1c85cfb79d321..743911936dccc 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -17,12 +17,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml from sklearn.ensemble import IsolationForest -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.preprocessing import LabelBinarizer from sklearn.utils import shuffle as sh @@ -63,7 +64,7 @@ def print_outlier_ratio(y): y = dataset.target if dat == "shuttle": - dataset = fetch_openml("shuttle", as_frame=False, parser="pandas") + dataset = fetch_openml("shuttle", as_frame=False) X = dataset.data y = dataset.target.astype(np.int64) X, y = sh(X, y, random_state=random_state) diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 458a04a463303..556c452fa3323 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -10,13 +10,16 @@ This allows the scaling of the algorithm with the problem size to be visualized and understood. """ -import numpy as np + +import argparse import gc from datetime import datetime -from sklearn.isotonic import isotonic_regression -from scipy.special import expit + import matplotlib.pyplot as plt -import argparse +import numpy as np +from scipy.special import expit + +from sklearn.isotonic import isotonic_regression def generate_perturbed_logarithm_dataset(size): diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index 00721aa7f18a9..26789c173688f 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -35,17 +35,17 @@ You can also set `arpack_all=True` to activate arpack solver for large number of components (this takes more time). """ + # Authors: Sylvain MARIE, Schneider Electric import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index a40ddea4506dd..cae74c6f442ff 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -37,17 +37,17 @@ Solvers comparison benchmark: time vs n_components", where this time the number of examples is fixed, and the desired number of components varies. """ + # Author: Sylvain MARIE, Schneider Electric import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index 9a893545fbb28..9bae570505a75 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -11,8 +11,10 @@ In both cases, only 10% of the features are informative. """ + import gc from time import time + import numpy as np from sklearn.datasets import make_regression @@ -59,9 +61,10 @@ def compute_bench(alpha, n_samples, n_features, precompute): if __name__ == "__main__": - from sklearn.linear_model import Lasso, LassoLars import matplotlib.pyplot as plt + from sklearn.linear_model import Lasso, LassoLars + alpha = 0.01 # regularization parameter n_features = 10 diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py index 31057e2e4067b..2c9732fab901f 100644 --- a/benchmarks/bench_lof.py +++ b/benchmarks/bench_lof.py @@ -18,11 +18,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.neighbors import LocalOutlierFactor -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer print(__doc__) @@ -44,7 +46,7 @@ y = dataset.target if dataset_name == "shuttle": - dataset = fetch_openml("shuttle", as_frame=False, parser="pandas") + dataset = fetch_openml("shuttle", as_frame=False) X = dataset.data y = dataset.target.astype(np.int64) # we remove data with label 4 diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index 4bc28ea1a165d..334e69ed5a30a 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -30,26 +30,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory -from sklearn.datasets import fetch_openml -from sklearn.datasets import get_data_home -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import fetch_openml, get_data_home from sklearn.dummy import DummyClassifier -from sklearn.kernel_approximation import Nystroem -from sklearn.kernel_approximation import RBFSampler +from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier +from sklearn.kernel_approximation import Nystroem, RBFSampler +from sklearn.linear_model import LogisticRegression from sklearn.metrics import zero_one_loss +from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_array -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier # Memoize the data extraction and memory map the resulting # train / test splits in readonly mode @@ -62,7 +60,7 @@ def load_data(dtype=np.float32, order="F"): ###################################################################### # Load dataset print("Loading dataset...") - data = fetch_openml("mnist_784", as_frame=True, parser="pandas") + data = fetch_openml("mnist_784", as_frame=True) X = check_array(data["data"], dtype=dtype, order=order) y = data["target"] @@ -86,10 +84,10 @@ def load_data(dtype=np.float32, order="F"): "ExtraTrees": ExtraTreesClassifier(), "RandomForest": RandomForestClassifier(), "Nystroem-SVM": make_pipeline( - Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100, dual="auto") + Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100) ), "SampledRBF-SVM": make_pipeline( - RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100, dual="auto") + RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100) ), "LogisticRegression-SAG": LogisticRegression(solver="sag", tol=1e-1, C=1e4), "LogisticRegression-SAGA": LogisticRegression(solver="saga", tol=1e-1, C=1e4), diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py index 2a87b388e91a2..1b8449a24da51 100755 --- a/benchmarks/bench_multilabel_metrics.py +++ b/benchmarks/bench_multilabel_metrics.py @@ -3,26 +3,25 @@ A comparison of multilabel target formats and metrics over them """ -from timeit import timeit -from functools import partial -import itertools import argparse +import itertools import sys +from functools import partial +from timeit import timeit import matplotlib.pyplot as plt -import scipy.sparse as sp import numpy as np +import scipy.sparse as sp from sklearn.datasets import make_multilabel_classification from sklearn.metrics import ( - f1_score, accuracy_score, + f1_score, hamming_loss, jaccard_similarity_score, ) from sklearn.utils._testing import ignore_warnings - METRICS = { "f1": partial(f1_score, average="micro"), "f1-by-sample": partial(f1_score, average="samples"), diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index 37af2fdd76562..9f92150e079dd 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -15,21 +15,20 @@ """ from time import time -import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import numpy as np from scipy.interpolate import interp1d -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype -from sklearn.preprocessing import LabelBinarizer, StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.utils import shuffle +from sklearn.datasets import fetch_covtype, fetch_kddcup99 from sklearn.kernel_approximation import Nystroem -from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM - -import matplotlib.pyplot as plt -import matplotlib +from sklearn.metrics import auc, roc_curve +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelBinarizer, StandardScaler +from sklearn.svm import OneClassSVM +from sklearn.utils import shuffle font = {"weight": "normal", "size": 15} diff --git a/benchmarks/bench_pca_solvers.py b/benchmarks/bench_pca_solvers.py new file mode 100644 index 0000000000000..337af3a42e900 --- /dev/null +++ b/benchmarks/bench_pca_solvers.py @@ -0,0 +1,165 @@ +# %% +# +# This benchmark compares the speed of PCA solvers on datasets of different +# sizes in order to determine the best solver to select by default via the +# "auto" heuristic. +# +# Note: we do not control for the accuracy of the solvers: we assume that all +# solvers yield transformed data with similar explained variance. This +# assumption is generally true, except for the randomized solver that might +# require more power iterations. +# +# We generate synthetic data with dimensions that are useful to plot: +# - time vs n_samples for a fixed n_features and, +# - time vs n_features for a fixed n_samples for a fixed n_features. +import itertools +from math import log10 +from time import perf_counter + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from sklearn import config_context +from sklearn.decomposition import PCA + +REF_DIMS = [100, 1000, 10_000] +data_shapes = [] +for ref_dim in REF_DIMS: + data_shapes.extend([(ref_dim, 10**i) for i in range(1, 8 - int(log10(ref_dim)))]) + data_shapes.extend( + [(ref_dim, 3 * 10**i) for i in range(1, 8 - int(log10(ref_dim)))] + ) + data_shapes.extend([(10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))]) + data_shapes.extend( + [(3 * 10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))] + ) + +# Remove duplicates: +data_shapes = sorted(set(data_shapes)) + +print("Generating test datasets...") +rng = np.random.default_rng(0) +datasets = [rng.normal(size=shape) for shape in data_shapes] + + +# %% +def measure_one(data, n_components, solver, method_name="fit"): + print( + f"Benchmarking {solver=!r}, {n_components=}, {method_name=!r} on data with" + f" shape {data.shape}" + ) + pca = PCA(n_components=n_components, svd_solver=solver, random_state=0) + timings = [] + elapsed = 0 + method = getattr(pca, method_name) + with config_context(assume_finite=True): + while elapsed < 0.5: + tic = perf_counter() + method(data) + duration = perf_counter() - tic + timings.append(duration) + elapsed += duration + return np.median(timings) + + +SOLVERS = ["full", "covariance_eigh", "arpack", "randomized", "auto"] +measurements = [] +for data, n_components, method_name in itertools.product( + datasets, [2, 50], ["fit", "fit_transform"] +): + if n_components >= min(data.shape): + continue + for solver in SOLVERS: + if solver == "covariance_eigh" and data.shape[1] > 5000: + # Too much memory and too slow. + continue + if solver in ["arpack", "full"] and log10(data.size) > 7: + # Too slow, in particular for the full solver. + continue + time = measure_one(data, n_components, solver, method_name=method_name) + measurements.append( + { + "n_components": n_components, + "n_samples": data.shape[0], + "n_features": data.shape[1], + "time": time, + "solver": solver, + "method_name": method_name, + } + ) +measurements = pd.DataFrame(measurements) +measurements.to_csv("bench_pca_solvers.csv", index=False) + +# %% +all_method_names = measurements["method_name"].unique() +all_n_components = measurements["n_components"].unique() + +for method_name in all_method_names: + fig, axes = plt.subplots( + figsize=(16, 16), + nrows=len(REF_DIMS), + ncols=len(all_n_components), + sharey=True, + constrained_layout=True, + ) + fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_samples", fontsize=16) + + for row_idx, ref_dim in enumerate(REF_DIMS): + for n_components, ax in zip(all_n_components, axes[row_idx]): + for solver in SOLVERS: + if solver == "auto": + style_kwargs = dict(linewidth=2, color="black", style="--") + else: + style_kwargs = dict(style="o-") + ax.set( + title=f"n_components={n_components}, n_features={ref_dim}", + ylabel="time (s)", + ) + measurements.query( + "n_components == @n_components and n_features == @ref_dim" + " and solver == @solver and method_name == @method_name" + ).plot.line( + x="n_samples", + y="time", + label=solver, + logx=True, + logy=True, + ax=ax, + **style_kwargs, + ) +# %% +for method_name in all_method_names: + fig, axes = plt.subplots( + figsize=(16, 16), + nrows=len(REF_DIMS), + ncols=len(all_n_components), + sharey=True, + ) + fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_features", fontsize=16) + + for row_idx, ref_dim in enumerate(REF_DIMS): + for n_components, ax in zip(all_n_components, axes[row_idx]): + for solver in SOLVERS: + if solver == "auto": + style_kwargs = dict(linewidth=2, color="black", style="--") + else: + style_kwargs = dict(style="o-") + ax.set( + title=f"n_components={n_components}, n_samples={ref_dim}", + ylabel="time (s)", + ) + measurements.query( + "n_components == @n_components and n_samples == @ref_dim " + " and solver == @solver and method_name == @method_name" + ).plot.line( + x="n_features", + y="time", + label=solver, + logx=True, + logy=True, + ax=ax, + **style_kwargs, + ) + +# %% diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py index 0f42e4b630f1d..49b87c8c7060a 100644 --- a/benchmarks/bench_plot_incremental_pca.py +++ b/benchmarks/bench_plot_incremental_pca.py @@ -7,13 +7,15 @@ """ -import numpy as np import gc -from time import time from collections import defaultdict +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import IncrementalPCA, PCA +from sklearn.decomposition import PCA, IncrementalPCA def plot_results(X, y, label): diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index c372ee07117fc..3b46e447401cb 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -2,16 +2,16 @@ The input data is mostly low rank but is a fat infinite tail. """ -from collections import defaultdict + import gc import sys +from collections import defaultdict from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram -from sklearn.linear_model import lasso_path from sklearn.datasets import make_regression +from sklearn.linear_model import lars_path, lars_path_gram, lasso_path def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py index c6e5541eda6f3..2cedb19fb23c4 100644 --- a/benchmarks/bench_plot_neighbors.py +++ b/benchmarks/bench_plot_neighbors.py @@ -1,13 +1,14 @@ """ Plot the scaling of the nearest neighbors algorithms with k, D, and N """ + from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import ticker -from sklearn import neighbors, datasets +from sklearn import datasets, neighbors def get_data(N, D, dataset="dense"): diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 78d6ad875cc34..f05ede117191b 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -1,33 +1,31 @@ """ Benchmarks of Non-Negative Matrix Factorization """ + # Authors: Tom Dupre la Tour (benchmark) # Chih-Jen Linn (original projected gradient NMF implementation) # Anthony Di Franco (projected gradient, Python and NumPy port) # License: BSD 3 clause -from time import time +import numbers import sys import warnings -import numbers +from time import time -import numpy as np import matplotlib.pyplot as plt -from joblib import Memory +import numpy as np import pandas +from joblib import Memory -from sklearn.utils._testing import ignore_warnings -from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import NMF -from sklearn.decomposition._nmf import _initialize_nmf -from sklearn.decomposition._nmf import _beta_divergence -from sklearn.decomposition._nmf import _check_init +from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf from sklearn.exceptions import ConvergenceWarning -from sklearn.utils.extmath import safe_sparse_dot, squared_norm +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.utils import check_array +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.extmath import safe_sparse_dot, squared_norm from sklearn.utils.validation import check_is_fitted, check_non_negative - mem = Memory(cachedir=".", verbose=0) ################### @@ -41,7 +39,7 @@ def _norm(x): """Dot product-based Euclidean norm implementation - See: http://fseoane.net/blog/2011/computing-the-vector-norm/ + See: https://fa.bianp.net/blog/2011/computing-the-vector-norm/ """ return np.sqrt(squared_norm(x)) @@ -261,8 +259,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0: raise ValueError( "Maximum number of iterations must be a positive " - "integer; got (max_iter=%r)" - % self.max_iter + "integer; got (max_iter=%r)" % self.max_iter ) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError( @@ -308,8 +305,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): if n_iter == self.max_iter and self.tol > 0: warnings.warn( "Maximum number of iteration %d reached. Increase it" - " to improve convergence." - % self.max_iter, + " to improve convergence." % self.max_iter, ConvergenceWarning, ) diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index a800b3ebe2ba9..8a4bc9b1a34fe 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -3,14 +3,15 @@ The input data is mostly low rank but is a fat infinite tail. """ + import gc import sys from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index a41e3fab20589..ca12972f9be6c 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -4,9 +4,8 @@ import matplotlib.pyplot as plt +from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels from sklearn.utils import check_random_state -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import pairwise_kernels def plot(func): diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index b21589263a49f..a80455e21c255 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -30,33 +30,34 @@ [1] Pham, N., & Pagh, R. (2013, August). Fast and scalable polynomial kernels via explicit feature maps. In Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 239-247) -(http://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf) +(https://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf) [2] Charikar, M., Chen, K., & Farach-Colton, M. (2002, July). Finding frequent items in data streams. In International Colloquium on Automata, Languages, and Programming (pp. 693-703). Springer, Berlin, Heidelberg. -(http://www.vldb.org/pvldb/1/1454225.pdf) +(https://people.cs.rutgers.edu/~farach/pubs/FrequentStream.pdf) """ + # Author: Daniel Lopez-Sanchez # License: BSD 3 clause # Load data manipulation functions -from sklearn.datasets import load_digits -from sklearn.model_selection import train_test_split +# Will use this for timing results +from time import time # Some common libraries import matplotlib.pyplot as plt import numpy as np -# Will use this for timing results -from time import time - -# Import SVM classifiers and feature map approximation algorithms -from sklearn.svm import LinearSVC, SVC +from sklearn.datasets import load_digits from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +# Import SVM classifiers and feature map approximation algorithms +from sklearn.svm import SVC, LinearSVC + # Split data in train and test sets X, y = load_digits()["data"], load_digits()["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7) diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index 2020096a21b88..6bb5618b3633f 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -65,28 +65,29 @@ # Author: Giorgio Patrini -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - import gc +import os.path import pickle -from time import time from collections import defaultdict -import os.path +from time import time + +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp -from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils import gen_batches -from sklearn.utils.validation import check_random_state -from sklearn.utils.extmath import randomized_svd -from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated from sklearn.datasets import ( - fetch_lfw_people, - fetch_openml, fetch_20newsgroups_vectorized, + fetch_lfw_people, fetch_olivetti_faces, + fetch_openml, fetch_rcv1, + make_low_rank_matrix, + make_sparse_uncorrelated, ) +from sklearn.utils import gen_batches +from sklearn.utils._arpack import _init_arpack_v0 +from sklearn.utils.extmath import randomized_svd +from sklearn.utils.validation import check_random_state try: import fbpca @@ -191,7 +192,7 @@ def get_data(dataset_name): del row del col else: - X = fetch_openml(dataset_name, parser="auto").data + X = fetch_openml(dataset_name).data return X diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index fc370d1073be1..ed99d1c44e2fd 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -2,14 +2,16 @@ The data is mostly low rank but is a fat infinite tail. """ + import gc -from time import time -import numpy as np from collections import defaultdict +from time import time +import numpy as np from scipy.linalg import svd -from sklearn.utils.extmath import randomized_svd + from sklearn.datasets import make_low_rank_matrix +from sklearn.utils.extmath import randomized_svd def compute_bench(samples_range, features_range, n_iter=3, rank=50): diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py index 696e833eede20..fe5cee201dff4 100644 --- a/benchmarks/bench_plot_ward.py +++ b/benchmarks/bench_plot_ward.py @@ -4,9 +4,9 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.cluster import hierarchy -import matplotlib.pyplot as plt from sklearn.cluster import AgglomerativeClustering diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index 89a4550944f3f..6551de690994b 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -6,19 +6,20 @@ Benchmarks for random projections. """ + +import collections import gc -import sys import optparse +import sys from datetime import datetime -import collections import numpy as np import scipy.sparse as sp from sklearn import clone from sklearn.random_projection import ( - SparseRandomProjection, GaussianRandomProjection, + SparseRandomProjection, johnson_lindenstrauss_min_dim, ) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index 2254ab81f30a4..166c6c2f5f9d1 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -3,14 +3,15 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt -from joblib import Memory -import numpy as np import gc import time -from sklearn.linear_model import LogisticRegression, SGDClassifier +import matplotlib.pyplot as plt +import numpy as np +from joblib import Memory + from sklearn.datasets import fetch_rcv1 +from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.linear_model._sag import get_auto_step_size try: diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py index 340549ef240e1..97d4ba7b4b75b 100644 --- a/benchmarks/bench_saga.py +++ b/benchmarks/bench_saga.py @@ -3,25 +3,27 @@ Benchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain in using multinomial logistic regression in term of learning time. """ + import json -import time import os +import time -from sklearn.utils.parallel import delayed, Parallel import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import ( + fetch_20newsgroups_vectorized, fetch_rcv1, - load_iris, load_digits, - fetch_20newsgroups_vectorized, + load_iris, ) from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split +from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.utils.extmath import safe_sparse_dot, softmax +from sklearn.utils.parallel import Parallel, delayed def fit_single( @@ -94,7 +96,6 @@ def fit_single( else: lr = LogisticRegression( solver=solver, - multi_class=multi_class, C=C, penalty=penalty, fit_intercept=False, @@ -102,6 +103,8 @@ def fit_single( max_iter=this_max_iter, random_state=42, ) + if multi_class == "ovr": + lr = OneVsRestClassifier(lr) # Makes cpu cache even for all fit calls X_train.max() @@ -117,10 +120,12 @@ def fit_single( except NotImplementedError: # Lightning predict_proba is not implemented for n_classes > 2 y_pred = _predict_proba(lr, X) + if isinstance(lr, OneVsRestClassifier): + coef = np.concatenate([est.coef_ for est in lr.estimators_]) + else: + coef = lr.coef_ score = log_loss(y, y_pred, normalize=False) / n_samples - score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum( - np.abs(lr.coef_) - ) + score += 0.5 * alpha * np.sum(coef**2) + beta * np.sum(np.abs(coef)) scores.append(score) train_score, test_score = tuple(scores) @@ -134,6 +139,7 @@ def fit_single( def _predict_proba(lr, X): + """Predict proba for lightning for n_classes >=3.""" pred = safe_sparse_dot(X, lr.coef_.T) if hasattr(lr, "intercept_"): pred += lr.intercept_ diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 10baad5a8495f..39cf1a11ffed6 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -2,15 +2,16 @@ Benchmarks for sampling without replacement of integer. """ + import gc -import sys +import operator import optparse +import random +import sys from datetime import datetime -import operator import matplotlib.pyplot as plt import numpy as np -import random from sklearn.utils.random import sample_without_replacement diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 47dd9e9fc758b..4b1b902795feb 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,16 +1,15 @@ # Author: Peter Prettenhofer # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt - import gc - from time import time -from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet -from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_regression +from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor +from sklearn.metrics import mean_squared_error """ Benchmark for SGD regression diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index f1aa482b8b732..1832ca40c6ddb 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -43,8 +43,9 @@ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) """ -from scipy.sparse import csr_matrix import numpy as np +from scipy.sparse import csr_matrix + from sklearn.linear_model import SGDRegressor from sklearn.metrics import r2_score diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 6d75d57658500..2eab7071544f9 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -8,8 +8,9 @@ * psutil (optional, but recommended) """ -import timeit + import itertools +import timeit import numpy as np import pandas as pd @@ -18,8 +19,8 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import ( CountVectorizer, - TfidfVectorizer, HashingVectorizer, + TfidfVectorizer, ) n_repeat = 3 diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index c23ef627e237e..c522bcb39e994 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -13,11 +13,13 @@ training set, classify a sample and plot the time taken as a function of the number of dimensions. """ -import numpy as np -import matplotlib.pyplot as plt + import gc from datetime import datetime +import matplotlib.pyplot as plt +import numpy as np + # to store the results scikit_classifier_results = [] scikit_regressor_results = [] diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index e399e891cb94e..813fffcf29141 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -7,18 +7,19 @@ # License: BSD 3 clause +import argparse +import json import os import os.path as op from time import time + import numpy as np -import json -import argparse from joblib import Memory from sklearn.datasets import fetch_openml +from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.neighbors import NearestNeighbors -from sklearn.decomposition import PCA from sklearn.utils import check_array from sklearn.utils import shuffle as _shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads @@ -35,7 +36,7 @@ def load_data(dtype=np.float32, order="C", shuffle=True, seed=0): """Load the data, then cache and memmap the train/test split""" print("Loading dataset...") - data = fetch_openml("mnist_784", as_frame=True, parser="pandas") + data = fetch_openml("mnist_784", as_frame=True) X = check_array(data["data"], dtype=dtype, order=order) y = data["target"] @@ -129,7 +130,8 @@ def sanitize(filename): try: from bhtsne.bhtsne import run_bh_tsne except ImportError as e: - raise ImportError("""\ + raise ImportError( + """\ If you want comparison with the reference implementation, build the binary from source (https://github.com/lvdmaaten/bhtsne) in the folder benchmarks/bhtsne and add an empty `__init__.py` file in the folder: @@ -139,7 +141,8 @@ def sanitize(filename): $ g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2 $ touch __init__.py $ cd .. -""") from e +""" + ) from e def bhtsne(X): """Wrapper for the reference lvdmaaten/bhtsne implementation.""" diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py index d32e3dd769d6a..fff71eed0a26c 100644 --- a/benchmarks/plot_tsne_mnist.py +++ b/benchmarks/plot_tsne_mnist.py @@ -1,9 +1,8 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path as op - import argparse +import os.path as op +import matplotlib.pyplot as plt +import numpy as np LOG_DIR = "mnist_tsne_output" diff --git a/build_tools/azure/debian_atlas_32bit_lock.txt b/build_tools/azure/debian_atlas_32bit_lock.txt index 1a8c4eca7c291..9da822ecd9cb8 100644 --- a/build_tools/azure/debian_atlas_32bit_lock.txt +++ b/build_tools/azure/debian_atlas_32bit_lock.txt @@ -4,29 +4,42 @@ # # pip-compile --output-file=build_tools/azure/debian_atlas_32bit_lock.txt build_tools/azure/debian_atlas_32bit_requirements.txt # -attrs==23.1.0 +attrs==23.2.0 # via pytest -coverage==7.2.7 +coverage==7.5.3 # via pytest-cov -cython==0.29.35 +cython==3.0.10 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt iniconfig==2.0.0 # via pytest -joblib==1.1.1 +joblib==1.2.0 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -packaging==23.1 - # via pytest -pluggy==1.0.0 +meson==1.4.1 + # via meson-python +meson-python==0.16.0 + # via -r build_tools/azure/debian_atlas_32bit_requirements.txt +ninja==1.11.1.1 + # via -r build_tools/azure/debian_atlas_32bit_requirements.txt +packaging==24.1 + # via + # meson-python + # pyproject-metadata + # pytest +pluggy==1.5.0 # via pytest py==1.11.0 # via pytest +pyproject-metadata==0.8.0 + # via meson-python pytest==7.1.2 # via # -r build_tools/azure/debian_atlas_32bit_requirements.txt # pytest-cov pytest-cov==2.9.0 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -threadpoolctl==2.2.0 +threadpoolctl==3.1.0 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt tomli==2.0.1 - # via pytest + # via + # meson-python + # pytest diff --git a/build_tools/azure/debian_atlas_32bit_requirements.txt b/build_tools/azure/debian_atlas_32bit_requirements.txt index 83baf09b14093..615193a71fc6b 100644 --- a/build_tools/azure/debian_atlas_32bit_requirements.txt +++ b/build_tools/azure/debian_atlas_32bit_requirements.txt @@ -1,8 +1,10 @@ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py -cython -joblib==1.1.1 # min -threadpoolctl==2.2.0 +cython==3.0.10 # min +joblib==1.2.0 # min +threadpoolctl==3.1.0 pytest==7.1.2 # min pytest-cov==2.9.0 # min +ninja +meson-python diff --git a/build_tools/azure/get_commit_message.py b/build_tools/azure/get_commit_message.py index 239da5b8c4498..0b1246b8d2724 100644 --- a/build_tools/azure/get_commit_message.py +++ b/build_tools/azure/get_commit_message.py @@ -1,6 +1,6 @@ +import argparse import os import subprocess -import argparse def get_commit_message(): diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh index 5238cd1121d2e..9a6a526fafa46 100755 --- a/build_tools/azure/install.sh +++ b/build_tools/azure/install.sh @@ -47,11 +47,19 @@ pre_python_environment_install() { } +check_packages_dev_version() { + for package in $@; do + package_version=$(python -c "import $package; print($package.__version__)") + if ! [[ $package_version =~ "dev" ]]; then + echo "$package is not a development version: $package_version" + exit 1 + fi + done +} + python_environment_install_and_activate() { if [[ "$DISTRIB" == "conda"* ]]; then - conda update -n base conda -y - conda install -c conda-forge "$(get_dep conda-lock min)" -y - conda-lock install --name $VIRTUALENV $LOCK_FILE + create_conda_environment_from_lock_file $VIRTUALENV $LOCK_FILE source activate $VIRTUALENV elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" ]]; then @@ -67,8 +75,12 @@ python_environment_install_and_activate() { if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then echo "Installing development dependency wheels" - dev_anaconda_url=https://pypi.anaconda.org/scipy-wheels-nightly/simple - pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy pandas scipy + dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple + dev_packages="numpy scipy pandas" + pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages + + check_packages_dev_version $dev_packages + echo "Installing Cython from latest sources" pip install https://github.com/cython/cython/archive/master.zip echo "Installing joblib from latest sources" @@ -109,19 +121,26 @@ scikit_learn_install() { export LDFLAGS="$LDFLAGS -Wl,--sysroot=/" fi - # TODO use a specific variable for this rather than using a particular build ... - if [[ "$DISTRIB" == "conda-pip-latest" ]]; then + if [[ "$BUILD_WITH_SETUPTOOLS" == "true" ]]; then + python setup.py develop + elif [[ "$PIP_BUILD_ISOLATION" == "true" ]]; then # Check that pip can automatically build scikit-learn with the build # dependencies specified in pyproject.toml using an isolated build # environment: - pip install --verbose --editable . + pip install --verbose . else + if [[ "$UNAMESTR" == "MINGW64"* ]]; then + # Needed on Windows CI to compile with Visual Studio compiler + # otherwise Meson detects a MINGW64 platform and use MINGW64 + # toolchain + ADDITIONAL_PIP_OPTIONS='-Csetup-args=--vsenv' + fi # Use the pre-installed build dependencies and build directly in the # current environment. - python setup.py develop + pip install --verbose --no-build-isolation --editable . $ADDITIONAL_PIP_OPTIONS fi - ccache -s + ccache -s || echo "ccache not installed, skipping ccache statistics" } main() { diff --git a/build_tools/azure/install_pyodide.sh b/build_tools/azure/install_pyodide.sh index 8bcfe45ef4152..58d0348a53202 100644 --- a/build_tools/azure/install_pyodide.sh +++ b/build_tools/azure/install_pyodide.sh @@ -15,8 +15,6 @@ pyodide build ls -ltrh dist -pyodide venv pyodide-venv -source pyodide-venv/bin/activate - -pip install dist/*.whl -pip list +# The Pyodide js library is needed by build_tools/azure/test_script_pyodide.sh +# to run tests inside Pyodide +npm install pyodide@$PYODIDE_VERSION diff --git a/build_tools/azure/install_win.sh b/build_tools/azure/install_win.sh deleted file mode 100755 index ab559a1878971..0000000000000 --- a/build_tools/azure/install_win.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -set -e -set -x - -# defines the get_dep and show_installed_libraries functions -source build_tools/shared.sh - -if [[ "$DISTRIB" == "conda" ]]; then - conda install -c conda-forge "$(get_dep conda-lock min)" -y - conda-lock install --name $VIRTUALENV $LOCK_FILE - source activate $VIRTUALENV -else - python -m venv $VIRTUALENV - source $VIRTUALENV/Scripts/activate - pip install -r $LOCK_FILE -fi - -show_installed_libraries - -# Build scikit-learn -python setup.py bdist_wheel - -# Install the generated wheel package to test it -pip install --pre --no-index --find-links dist scikit-learn diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml index af776c4c62f14..b00ca66c378ca 100644 --- a/build_tools/azure/posix-docker.yml +++ b/build_tools/azure/posix-docker.yml @@ -22,7 +22,6 @@ jobs: # Set in azure-pipelines.yml DISTRIB: '' DOCKER_CONTAINER: '' - SHOW_SHORT_SUMMARY: 'false' CREATE_ISSUE_ON_TRACKER: 'true' CCACHE_DIR: $(Pipeline.Workspace)/ccache CCACHE_COMPRESS: '1' diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml index 2ee03daafd288..35e5165d22c83 100644 --- a/build_tools/azure/posix.yml +++ b/build_tools/azure/posix.yml @@ -22,7 +22,6 @@ jobs: PYTEST_XDIST_VERSION: 'latest' COVERAGE: 'true' CREATE_ISSUE_ON_TRACKER: 'true' - SHOW_SHORT_SUMMARY: 'false' strategy: matrix: ${{ insert }}: ${{ parameters.matrix }} diff --git a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock deleted file mode 100644 index 3a15776662079..0000000000000 --- a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock +++ /dev/null @@ -1,99 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 79255228ac886c1c3fdbcda6a5d6e899b5ab035d633fa540a755b9ba633c2a2c -@EXPLICIT -https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran4-7.5.0-ha8ba4b0_17.conda#e3883581cbf0a98672250c3e80d292bf -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.5.0-ha8ba4b0_17.conda#ecb35c8952579d5c8dc56c6e076ba948 -https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd -https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd -https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 -https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/expat-2.4.9-h6a678d5_0.conda#3a6139fbcd96384855f0e6037502bf28 -https://repo.anaconda.com/pkgs/main/linux-64/giflib-5.2.1-h5eee18b_3.conda#aa7d64adb3cd8a75d398167f8c29afc3 -https://repo.anaconda.com/pkgs/main/linux-64/icu-58.2-he6710b0_3.conda#48cc14d5ad1a9bcd8dac17211a8deb8b -https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fda872108412d1ccfe3fa572 -https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 -https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_0.conda#b4891fa07ca4cad1c53a0d0e539482da -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 -https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.18-hf726d26_0.conda#10422bb3b9b022e27798fc368cda69ba -https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.2.4-h5eee18b_1.conda#a65a20c48061ecf2a6f4f02eae9f2366 -https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 -https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_0.conda#53915e9402180a7f22ea619c41089520 -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/nspr-4.35-h6a678d5_0.conda#208fff5d60133bcff6998a70c9f5203b -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32 -https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.45-h295c915_0.conda#b32ccc24d1d9808618c1e898da60f68d -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 -https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e -https://repo.anaconda.com/pkgs/main/linux-64/glib-2.69.1-he621ea3_2.conda#51cf1899782b3f3744aedd143fbc07f3 -https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20221030-h5eee18b_0.conda#7c724a17739aceaf9d1633ff06962137 -https://repo.anaconda.com/pkgs/main/linux-64/libevent-2.1.12-h8f2d780_0.conda#8de03cd4b6ee0ddeb0571a5199db5637 -https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201 -https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22 -https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.3-hcbfbd50_0.conda#95357588631b66da8f97ddbfbdf2e4e1 -https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_0.conda#0f59d57dc21f585f4c282d60dfb46505 -https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 -https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 -https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e -https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.19.4-h568e23c_0.conda#649816c5e24c76bd06e74a0eb671a82e -https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011 -https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.0-h6a678d5_2.conda#b3391ee6956636eb8ef159c1c454e3da -https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50 -https://repo.anaconda.com/pkgs/main/linux-64/libxslt-1.1.37-h2085143_0.conda#680f9676bf55bdafd276eaa12fbb0f28 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1 -https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 -https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff -https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 -https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.9-h16c4e8d_3.conda#0f127be216a734916faf456bb21404e9 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.2.4-h11a3e52_1.conda#9f9153b30e58e9ce896f74634622cbf1 -https://repo.anaconda.com/pkgs/main/linux-64/nss-3.89.1-h6a678d5_0.conda#4d9d28fc3a0ca4916f281d2f5429ac50 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.16-h7a1cb2a_3.conda#c11c0992727585f5f991760f5b18c968 -https://repo.anaconda.com/pkgs/main/linux-64/attrs-22.1.0-py38h06a4308_0.conda#51beb64c6f06b5a69529df7ecaccc3f9 -https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.33-py38h6a678d5_0.conda#eb105388ba8bcf5ce82cf4cd5deeb5f9 -https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.0.4-py38h06a4308_0.conda#db954e73dca6076c64a1004d71b45784 -https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 -https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py38h06a4308_0.conda#ee7f1f50ae15650057e5d5301900ae34 -https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py38h6a678d5_0.conda#7424aa335d22974192800ec19a68486e -https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.17.3-py38h2f8d375_0.conda#40edbb76ecacefb1e6ab639b514822b1 -https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.0-py38h06a4308_0.conda#87dd3a3af0b6c6f5bbb99b7f205c2612 -https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.4.0-py38h6a678d5_0.conda#8afd1f4f8b23a1c44fca4975253b17f7 -https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py38h06a4308_1.conda#87bb1d3f6cf3e409a1dac38cee99918e -https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py38_0.conda#d6a69c576c6e4d19e3074eaae3d149f2 -https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e -https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py38h06a4308_0.conda#becbbf51d2b05de228eed968e20f963d -https://repo.anaconda.com/pkgs/main/linux-64/pytz-2022.7-py38h06a4308_0.conda#19c9f6a24d5c6f779c645d00f646666b -https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h8373d8f_8.conda#fd275fd09d648f31bfdb27aebb239eeb -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py38h06a4308_0.conda#629ffd3b3738163d536a6c06e0b14164 -https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 -https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2 -https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py38h06a4308_0.conda#791cce9de9913e9587b0a85cd8419123 -https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.2-py38h5eee18b_0.conda#db2f7ebc500d97a4af6889dfd0d03dbc -https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py38h5eee18b_0.conda#a05c1732d4e67102d2aa8d7e56de778b -https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.17.3-py38h7e8d029_0.conda#5f2b196b515f8fe6b37e3d224650577d -https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.3.1-py38h06a4308_0.conda#456f5c7532523cc7bd098e0a87a199dc -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/linux-64/qt-webengine-5.15.9-hbbf29b9_6.conda#9f2b3a9673e955f7ecc9e814d9afc9f5 -https://repo.anaconda.com/pkgs/main/linux-64/sip-6.6.2-py38h6a678d5_0.conda#cb3f0d10f7f79870945f4dbbe0000f92 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.1.3-py38hef1b27d_0.conda#a7ad7d097c25b7beeb76f370d51687a1 -https://repo.anaconda.com/pkgs/main/linux-64/pandas-1.2.4-py38ha9443f7_0.conda#5bd3fd807a294f387feabc65821b75d0 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.11.0-py38h6a678d5_1.conda#7bc403c7d55f1465e922964d293d2186 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.0.0-py38h06a4308_0.conda#54035e39255f285f98ca1141b7f098e7 -https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4 -https://repo.anaconda.com/pkgs/main/linux-64/qtwebkit-5.212-h3fafdc1_5.conda#e811bbc0456e3d3a02cab199492153ee -https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.5.0-py38habc2bb6_0.conda#a27a97fc2377ab74cbd33ce22d3c3353 -https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py38h79cecc1_0.conda#6e7f4f94000b244396de8bf4e6ae8dc4 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.7-py38h6a678d5_1.conda#62232dc285be8e7e85ae9596d89b3b95 -https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.1.3-py38_0.conda#70d5f6df438d469dc78f082389ada23d diff --git a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock deleted file mode 100644 index 939830bc2a0a0..0000000000000 --- a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock +++ /dev/null @@ -1,126 +0,0 @@ -# Generated by conda-lock. -# platform: win-64 -# input_hash: e3af9571d95aff7d02e118db6e2ccbce90cd3cf3c663b4ed8a5e8c3fef5b1318 -@EXPLICIT -https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2023.5.7-h56e8100_0.conda#604212634bd8c4d6f20d44b946e8eedb -https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2023.1.0-h57928b3_46319.conda#dbc4636f419722fbf3ab6501377228ba -https://conda.anaconda.org/conda-forge/win-64/mkl-include-2022.1.0-h6a75c08_874.tar.bz2#414f6ab96ad71e7a95bd00d990fa3473 -https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa -https://conda.anaconda.org/conda-forge/win-64/python_abi-3.8-3_cp38.conda#c6df946723dadd4a5830a8ff8c6b9a20 -https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9 -https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0 -https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.34.31931-h5081d32_16.conda#22125178654c6a8a393f9743d585704b -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd -https://conda.anaconda.org/conda-forge/win-64/vc-14.3-hb25d44b_16.conda#ea326b37e3bd6d2616988e09f3a9396c -https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.34.31931-hed1258a_16.conda#0374eae69b6dbfb27c3dc27167109eb4 -https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h8ffe710_4.tar.bz2#7c03c66026944073040cb19a4f3ec3c9 -https://conda.anaconda.org/conda-forge/win-64/icu-72.1-h63175ca_0.conda#a108731562663d787066bd17c9595114 -https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074 -https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.0.9-hcfcfb64_8.tar.bz2#e8078e37208cd7d3e1eb5053f370ded8 -https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.18-hcfcfb64_0.conda#493acc14c556ef6f1d13ba00b099c679 -https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135 -https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-h8ffe710_0.tar.bz2#050119977a86e4856f0416e2edcf81bb -https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-2.1.5.1-hcfcfb64_0.conda#f2fad2ae9f1365e343e4329fdb1e9d63 -https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d -https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.42.0-hcfcfb64_0.conda#9a71d93deb99cc09d8939d5235b5909a -https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.3.0-hcfcfb64_0.conda#381a3645c51cbf478872899b16490318 -https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_4.tar.bz2#0cc5c5cc64ee1637f37f8540a175854c -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc -https://conda.anaconda.org/conda-forge/win-64/openssl-3.1.1-hcfcfb64_1.conda#1d913a5de46c6b2f7e4cfbd26b106b8b -https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4 -https://conda.anaconda.org/conda-forge/win-64/tk-8.6.12-h8ffe710_0.tar.bz2#c69a5047cc9291ae40afd4a1ad6f0c0f -https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219 -https://conda.anaconda.org/conda-forge/win-64/gettext-0.21.1-h5728263_0.tar.bz2#299d4fd6798a45337042ff5a48219e5f -https://conda.anaconda.org/conda-forge/win-64/krb5-1.20.1-heb0366b_0.conda#a07b05ee8f451ab15698397185efe989 -https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.0.9-hcfcfb64_8.tar.bz2#99839d9d81f33afa173c0fa82a702038 -https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.0.9-hcfcfb64_8.tar.bz2#88e62627120c20289bf8982b15e0a6a1 -https://conda.anaconda.org/conda-forge/win-64/libclang13-15.0.7-default_h77d9078_2.conda#c2e1def32a19610ac26db453501760b6 -https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.39-h19919ed_0.conda#ab6febdb2dbd9c00803609079db4de71 -https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 -https://conda.anaconda.org/conda-forge/win-64/libxml2-2.11.4-hc3477c8_0.conda#586627982a63815637f871a6360fe3f9 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de -https://conda.anaconda.org/conda-forge/win-64/pcre2-10.40-h17e33f8_0.tar.bz2#2519de0d9620dc2bc7e19caf6867136d -https://conda.anaconda.org/conda-forge/win-64/python-3.8.16-h4de0772_1_cpython.conda#461d9fc92cfde68f2ca7ef0988f6326a -https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.2-h12be248_6.conda#62826565682d013b3e2346aaf7bded0e -https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.0.9-hcfcfb64_8.tar.bz2#e18b70ed349d96086fd60a9c642b1b58 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/win-64/cython-0.29.35-py38hd3f51b4_0.conda#b4529ae0e6ffa88bd31dbfd25a733977 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-h546665d_1.conda#1b513009cd012591f3fdc9e03a74ec0a -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.4-py38hb1fd069_1.tar.bz2#1dcc50e3241f9e4e59713eec2653abd5 -https://conda.anaconda.org/conda-forge/win-64/libclang-15.0.7-default_h77d9078_2.conda#70188b1b3e0b1716405adab9050894d1 -https://conda.anaconda.org/conda-forge/win-64/libglib-2.76.3-he8f3873_0.conda#4695e6acaf4790170161048d56cb51fc -https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.9.1-cpu_hadd60ae_5.conda#26867ad630a49c49fc123abfde634c7e -https://conda.anaconda.org/conda-forge/win-64/libtiff-4.5.0-h6c8260b_6.conda#12628df645fcf0f74922138858724831 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/win-64/tornado-6.3.2-py38h91455d4_0.conda#3e625e06e8892112acb47695eaf22b47 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.0.0-py38h91455d4_0.tar.bz2#7a135e40d9f26c15419e5e82e1c436c0 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3 -https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyhd8ed1ab_6.tar.bz2#30878ecc4bd36e8deeea1e3c151b2e0b -https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a -https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/win-64/brotli-1.0.9-hcfcfb64_8.tar.bz2#2e661f21e1741c11506bdc7226e6b0bc -https://conda.anaconda.org/conda-forge/win-64/coverage-7.2.7-py38h91455d4_0.conda#2fa3faef0a7b6a5da2bff0faddbfbc68 -https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.76.3-h12be248_0.conda#3015483cb3ffa200d51aac3c691fcda0 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/win-64/lcms2-2.15-h3e3b177_1.conda#a76c36ad1b4b87f038d67890122d08ec -https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c -https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.0-ha2aaf27_2.conda#db0490689232e8e38c312281df6f31a2 -https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyh0701188_6.tar.bz2#56cd9fe388baac0e90c7149cfac95b60 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/win-64/sip-6.7.9-py38hd3f51b4_0.conda#b963e96205cfc5e98bc852a8e9349e22 -https://conda.anaconda.org/conda-forge/win-64/tbb-2021.9.0-h91493d7_0.conda#6aa3f1becefeaa00a4d2a79b2a478aee -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/win-64/fonttools-4.39.4-py38h91455d4_0.conda#9eb3fd3d1aed8bc15853dd978d9abcdb -https://conda.anaconda.org/conda-forge/win-64/glib-2.76.3-h12be248_0.conda#fa3f1af2dc70e0d00a755667a741fad3 -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b -https://conda.anaconda.org/conda-forge/win-64/mkl-2022.1.0-h6a75c08_874.tar.bz2#2ff89a7337a9636029b4db9466e9f8e3 -https://conda.anaconda.org/conda-forge/win-64/pillow-9.5.0-py38ha7eb54a_1.conda#a7066629f65b5a301e76114e06a91096 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.11.0-py38hd3f51b4_3.conda#948a9d38ac004da975f9862194c25f68 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.22.3-h6b5321d_1.conda#00afb31665a8028ca2ff9af61fea64e1 -https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-17_win64_mkl.conda#9e42ac6b256b96bfaa19f829c25940e8 -https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2022.1.0-h57928b3_875.tar.bz2#6319a06307af296c1dfae93687c283b2 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.22.3-h001b923_1.conda#bd6347f397891bf4eb264c652221507c -https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-17_win64_mkl.conda#768b2c3be666ecf9e62f939ea919f819 -https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-17_win64_mkl.conda#278121fe8f0d65d496998aa290f36322 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-17_win64_mkl.conda#6c98bb1c41479063f089459dcdedcecb -https://conda.anaconda.org/conda-forge/win-64/numpy-1.24.3-py38h1d91fd2_0.conda#2768aa0aa44da206dc5fc3d1ba6ad857 -https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-h2c8576c_13.conda#b00e4814feb5fa92b864ef031130c2cf -https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-17_win64_mkl.conda#bfcbcc96906ca944d944eb4ae340371a -https://conda.anaconda.org/conda-forge/win-64/contourpy-1.0.7-py38hb1fd069_0.conda#6b53200dddcec578cdd90cac146eeadd -https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.7-py38hd6c051e_3.conda#9b17c0bbf19c6e265c3967e33df8770a -https://conda.anaconda.org/conda-forge/win-64/scipy-1.10.1-py38h1aea9ed_3.conda#1ed766b46170f86ead2ae6b9b8151191 -https://conda.anaconda.org/conda-forge/win-64/blas-2.117-mkl.conda#a6b489be6ddbc3259df7cc8a440b8950 -https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.7.1-py38h528a6c7_0.conda#0aebccad15d74ec7f1bc3d62497ad1a8 -https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.7.1-py38haa244fe_0.conda#f41a8af387463a78ad87571c767d0d80 diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock deleted file mode 100644 index 83b59e621f828..0000000000000 --- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock +++ /dev/null @@ -1,179 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: d249329b78962bdba40d2f7d66c3a94b4caaced25b05b3bc95f39dda6c72aebe -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 -https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277 -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099 -https://conda.anaconda.org/conda-forge/linux-64/python-3.8.16-he550d4f_1_cpython.conda#9de84cccfbc5f8350a3667bb6ef6fc30 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py38h17151c0_0.conda#551ebaa88e71c13dbede1b60a80acf7b -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17 -https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py38h59b608b_0.conda#5836e4ab0399136ede58446a4776b2ff -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py38h01eb140_0.conda#3db869202b0e523d606d13e81ca79ab6 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py38h0a891b7_0.tar.bz2#44421904760e9f5ae2035193e04360f0 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py38hfbd4bf9_0.conda#638537863b298151635c05c762a997ab -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py38h01eb140_0.conda#8eb5a370d618aa8a65dee377153a3451 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py38h885162f_1.conda#0eec8a20a17f17ec9e0b6839be466866 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py38h17151c0_0.conda#6a54fd42b71a8b1c5f9c4a691270cdf1 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py38h01efb38_0.conda#71066496987a1b50632526154e3d9711 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py38h8dc9893_3.conda#7bb0328b4a0f857aeb432426b9a5f908 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py38hd6c3c57_0.conda#3b8ba76acae09fbd4b2247c4ee4c0324 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py38h59b608b_3.conda#2f2a57462fcfbc67dfdbb0de6f7484c2 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py38h757e2ef_0.conda#b935895fb7ba4717f07688f2b1f4f567 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py38ha0d8c90_3.conda#e965dc172d67920d058ac2b3a0e27565 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py38h578d9bd_0.conda#50ff9e0a3dd459a0ca365741072bf9a2 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock index 673981be3e05e..09249ad17160b 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock @@ -1,55 +1,66 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 56e8dae95dcae13cac7ca1898bda12f1408bcea8a1aeb587ced409672f398a4b +# input_hash: 50fed47bc507d9ee3dbf5ff7a2247cb88944928bd5797e534ebdf8ece2d858ec @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.6.2-hbcca054_0.conda#847c3c2905cc467cea52c24f9cfa8080 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-3_cp311.conda#c2e2630ddb68cf52eec74dc7dfab20b5 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_3.conda#7c1062eaa78dec4ea8a9a988dbda6045 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-4_cp311.conda#d786502c97404c94d7d58d258a445a65 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/cudatoolkit-11.8.0-h37601d7_11.conda#9d166760c8cfa83e2fc989928312da3d -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 -https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.0-hd590300_0.conda#71b89db63b5b504e7afc8ad901172e1e +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd +https://conda.anaconda.org/conda-forge/linux-64/libabseil-20230125.3-cxx17_h59595ed_0.conda#d1db1b8be7c3a8983dcbbbfe4f0765de +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_9.conda#61641e239f96eae2b8492dc7e755828c +https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libnuma-2.0.18-h4ab18f5_2.conda#a263760479dbc7bc1f3df12707bd90dc https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.8.0-h166bdaf_0.tar.bz2#ede4266dc02e875fe1ea77b25dd43747 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277 -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-h4ab18f5_0.conda#a41fa0e391cc9e0d6b78ac69ca047a6c +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/rdma-core-28.9-h59595ed_1.conda#aeffb7c06b5f65e55e6c637408dc4100 +https://conda.anaconda.org/conda-forge/linux-64/re2-2023.03.02-h8c504da_0.conda#206f8fa808748f6e90599c3368a1114e https://conda.anaconda.org/conda-forge/linux-64/sleef-3.5.1-h9b69904_2.tar.bz2#6e016cf4c525d04a7bd038cee53ad3fd -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.10-hdb0a2a9_1.conda#78b8b85bdf1f42b8a2b3cb577d8742d1 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 @@ -59,126 +70,152 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_10 https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a +https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.6.1-hc309b26_1.conda#cc09293a2c2b7fd77aff284f370c12c0 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.2.17-h4d4d85c_2.conda#9ca99452635fe03eb5fa937f5ae604b0 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.1.12-h4d4d85c_1.conda#eba092fc6de212a01de0065f38fe8bbb +https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.1.17-h4d4d85c_1.conda#30f9df85ce23cd14faa9a4dfa50cca2b +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/glog-0.6.0-h6f12383_0.tar.bz2#b31f3565cb84435407594e548a2fb7b2 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_9.conda#081aa22f4581c08e4372b0b6c2f8478e +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_9.conda#1f0a03af852a9659ed2bf08f2f1704fd +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.21.12-h3eb15da_0.conda#4b36c68184c6c85d88c6e595a32a1ede -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.21.12-hfc55251_2.conda#e3a7d4ba09b8dc939b98fef55f539220 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_1.conda#340278ded8b0dc3a73f3660bbb0adbc6 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 +https://conda.anaconda.org/conda-forge/linux-64/s2n-1.3.49-h06160fa_0.conda#1d78349eb26366ecc034a4afe70a8534 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/ucx-1.14.1-h64cca9d_5.conda#39aa3b356d10d7e5add0c540945a0944 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999 -https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.1-cuda112_haf10fcf_5.conda#b8996ffa972161676ba6972af4c41384 -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099 -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.3-h2755cc3_0_cpython.conda#37005ea5f68df6a8a381b70cf4d4a160 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.13.32-he9a53bd_1.conda#8a24e5820f4a0ffd2ed9c4722cd5d7ca +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_9.conda#d47dee1856d9cb955b8076eeff304a5b +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.54.3-hb20ce57_0.conda#7af7c59ab24db007dfd82e0a3a343f66 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h5622ce7_1001.conda#fc2d5b79c2d3f8568fbab31db7ae02f3 +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.7-hb77312f_0.conda#bc0ea7e1f75a9b1c8467597fbbd9f86b +https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.18.1-h8fd135c_2.conda#bbf65f7688512872f063810623b755dc +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.7-ha31de31_0.conda#7234f31acd176e402e91e03feba90f7d +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/orc-1.9.0-h2f23424_1.conda#9571eb3eb0f7fe8b59956a7786babbcd +https://conda.anaconda.org/conda-forge/linux-64/python-3.11.9-hb806964_0_cpython.conda#ac68acfa8b558ed406c75e98d3428d7b https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d -https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.2-pyhd8ed1ab_0.conda#3d34f2f6987f8d098ab00198c170a77e -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.7.1-pyhd8ed1ab_0.conda#8791d81c38f676a7c08c76546800bf70 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.3.1-h2e3709c_4.conda#2cf21b1cbc1c096a28ffa2892257a2c1 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.7.11-h00aa349_4.conda#cb932dff7328ff620ce8059c9968b095 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_9.conda#4601544b4982ba1861fa9b9c607b2c06 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py311hb755f60_0.conda#17f4738a1ca6155a63d2a0cbd3e4a8b1 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py311hb755f60_0.conda#f3a8a500a2e743ff92f418f0eaf9bf71 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f -https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_1.conda#2c65bdf442b0d37aad080c8a4e0d452f +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.7-default_h087397f_0.conda#536526073c2e7f9056fdce8584da779e +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.8.0-hca28451_0.conda#f21c27f076a07907e70c49bb57bd0f20 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.9.0-hf52228f_0.conda#f495e42d3d2020b025705625edf35490 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h297d8ca_1.conda#3ff978d8994f591818a506640c6a7071 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py311h459d7ec_0.conda#12b1c374ee90a1aa11ea921858394dc8 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py311h331c9d8_0.conda#e29e451c96bf8e81a5760b7565c6ed2c +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_0.conda#ebe6952715e1d5eb567eeebf25250fa7 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc -https://conda.anaconda.org/conda-forge/linux-64/coverage-7.2.7-py311h459d7ec_0.conda#3c2c65575c28b23afc5e4ff721a2fc9f -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py311h459d7ec_0.conda#ddd2cd004e10bc7a1e042283326cbf91 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.3-h28f7589_1.conda#97503d3e565004697f1651753aa95b9e +https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.9.3-hb447be9_1.conda#c520669eb0be9269a5f0d8ef62531882 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.5.3-py311h331c9d8_0.conda#543dd05fd661e4e9c9deb3b37093d6a2 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.53.0-py311h331c9d8_0.conda#2daef6c4ce74840c8d7a431498be83e9 +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.12.0-hac9eb74_1.conda#0dee716254497604762957076ac76540 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.2.1-h84fe81f_16997.conda#a7ce56d5757f5b57e7daabe703ade5bb -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py311h0b84326_1.conda#6be2190fdbf26a6c1d3356a54d955237 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py311hb755f60_0.conda#2b5430f2f1651f460c852e1fdd549184 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py311h18e6fac_0.conda#6c520a9d36c9d7270988c7a6c360d6d4 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py311hb755f60_0.conda#02336abab4cb5dd794010ef53c54bd09 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.3.14-hf3aad02_1.conda#a968ffa7e9fe0c257628033d393e512f https://conda.anaconda.org/conda-forge/linux-64/blas-1.0-mkl.tar.bz2#349aef876b1d8c9dccae01de20d5b385 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.4-haf2f30d_0.conda#926c2c7ee7a0b48d6d70783a33f7bc80 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py311hcafe171_3.conda#0d79df2a96f6572fed2883374400b235 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py311hb755f60_5.conda#e4d262cc3600e70b505a6761d29f6207 +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.21.0-hb942446_5.conda#07d92ed5403ad7b5c66ffd7d5b8f7e57 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.4-h9ad1361_0.conda#147cce520ec59367549fd0d96d404213 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py311h64a7726_0.conda#f1d507e1a5f1151845f7818ceb02ba9f -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py311ha3edf6b_0.conda#e7548e7f58965a2fe97a95950a5fedc6 -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py311h320fe9a_0.conda#509769b430266dc5c2f6a3eab0f23164 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py311ha74522f_3.conda#ad6dd0bed0cdf5f2d4eb2b989d6253b3 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.10.57-h85b1a90_19.conda#0605d3d60857fc07bd6a11e878fe0f08 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py311h64a7726_0.conda#a502d7aad449a1206efb366d6a12c52d +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/noarch/array-api-strict-1.1.1-pyhd8ed1ab_0.conda#941bbcd64d1a7b44aeb497f468fc85b4 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py311h9547e67_0.conda#74ad0ae64f1ef565e27eda87fa749e84 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-12.0.1-hb87d912_8_cpu.conda#3f3b11398fe79b578e3c44dd00a44e4a +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py311h14de704_1.conda#84e2dd379d4edec4dd6382861486104d +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.31-py311h00856b1_0.conda#4f1cc2c95c25fe838acabfa8dc0d48ff +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py311hf0fb5b6_5.conda#ec7e45bc76d9d0b69a74a2075932b8e8 https://conda.anaconda.org/conda-forge/linux-64/pytorch-1.13.1-cpu_py311h410fd25_1.conda#ddd2fadddf89e3dc3d541a2537fce010 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py311h64a7726_3.conda#a01a3a7428e770db5a0c8c7ab5fce7f7 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py311h8597a09_0.conda#70c3b734ffe82c16b6d121aaa11929a8 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py311hcb41070_0.conda#af2d6818c526791fb81686c554ab262b +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.1-py311h517d4fd_0.conda#764b0e055f59dbd7d114d32b8c6e55e6 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py311ha4ca890_2.conda#0848e2084cbb57014f232f48568561af +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py311h5510f57_1.conda#734865cccfb0a27b433ea31bd178d0e3 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-12.0.1-py311h39c9aba_8_cpu.conda#587370a25bb2c50cce90909ce20d38b8 https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-1.13.1-cpu_py311hdb170b5_1.conda#a805d5f103e493f207613283d8acbbe1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py311h38be061_0.conda#8fd462c8bcbba5a3affcb2d04e387476 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py311h38be061_2.conda#7667100b9559c1b7a40c728cd72dabdf diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml index 2eb5ebde3445e..30a2fe1d1812a 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml @@ -15,12 +15,17 @@ dependencies: - pandas - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python - pytest-cov - coverage - ccache - pytorch=1.13 - pytorch-cpu + - polars + - pyarrow - array-api-compat + - array-api-strict diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml deleted file mode 100644 index 02392a4e05aa8..0000000000000 --- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml +++ /dev/null @@ -1,21 +0,0 @@ -# DO NOT EDIT: this file is generated from the specification found in the -# following script to centralize the configuration for CI builds: -# build_tools/update_environments_and_lock_files.py -channels: - - conda-forge -dependencies: - - python - - numpy - - blas[build=mkl] - - scipy - - cython - - joblib - - threadpoolctl - - matplotlib - - pandas - - pyamg - - pytest - - pytest-xdist=2.5.0 - - pillow - - setuptools - - ccache diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock deleted file mode 100644 index e2252fa80607f..0000000000000 --- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock +++ /dev/null @@ -1,181 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 28f25ea7bcf22e93278ac96747ca9700ada47330f6e3ed927edb73ab4a4c153e -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2022.1.0-h84fe81f_915.tar.bz2#2dcd1acca05c11410d4494d7fc7dfa2a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-3_cp311.conda#c2e2630ddb68cf52eec74dc7dfab20b5 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/cudatoolkit-11.8.0-h37601d7_11.conda#9d166760c8cfa83e2fc989928312da3d -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 -https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277 -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999 -https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.1-cuda112_haf10fcf_5.conda#b8996ffa972161676ba6972af4c41384 -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099 -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.3-h2755cc3_0_cpython.conda#37005ea5f68df6a8a381b70cf4d4a160 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py311hb755f60_0.conda#17f4738a1ca6155a63d2a0cbd3e4a8b1 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f -https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.9.0-hf52228f_0.conda#f495e42d3d2020b025705625edf35490 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py311h459d7ec_0.conda#12b1c374ee90a1aa11ea921858394dc8 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py311h459d7ec_0.conda#ddd2cd004e10bc7a1e042283326cbf91 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9 -https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.1.0-h84fe81f_915.tar.bz2#b9c8f925797a93dbff45e1626b025a6b -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py311h0b84326_1.conda#6be2190fdbf26a6c1d3356a54d955237 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py311hb755f60_0.conda#2b5430f2f1651f460c852e1fdd549184 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a -https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2022.1.0-ha770c72_916.tar.bz2#69ba49e445f87aea2cba343a71a35ca2 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py311hcafe171_3.conda#0d79df2a96f6572fed2883374400b235 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_mkl.tar.bz2#44ccc4d4dca6a8d57fa17442bc64b5a1 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py311h64a7726_0.conda#f1d507e1a5f1151845f7818ceb02ba9f -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_mkl.tar.bz2#3f92c1c9e1c0e183462c5071aa02cae1 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py311ha3edf6b_0.conda#e7548e7f58965a2fe97a95950a5fedc6 -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py311h320fe9a_0.conda#509769b430266dc5c2f6a3eab0f23164 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py311ha74522f_3.conda#ad6dd0bed0cdf5f2d4eb2b989d6253b3 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py311h64a7726_3.conda#a01a3a7428e770db5a0c8c7ab5fce7f7 -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-mkl.tar.bz2#c196a26abf6b4f132c88828ab7c2231c -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py311h8597a09_0.conda#70c3b734ffe82c16b6d121aaa11929a8 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py311hcb41070_0.conda#af2d6818c526791fb81686c554ab262b -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py311h38be061_0.conda#8fd462c8bcbba5a3affcb2d04e387476 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock index eff7998346172..4b7200379d4fe 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock @@ -1,131 +1,129 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: b93f19a33e87617bd672a74b684ecbc39aba1924122ef1860af442118a396fbd +# input_hash: e7c2bc2b07721ef735f30d3b1cf0b2a780b5bf5c138d9d18ad174611bfbd32bf @EXPLICIT -https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h0d85af4_4.tar.bz2#37edc4e6304ca87316e160f5ca0bd1b5 -https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2023.5.7-h8857fd0_0.conda#b704e4b79ba0d887c4870b7b09d6a4df -https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.0.9-hb7f2c08_8.tar.bz2#37157d273eaf3bc7d6862104161d9ec9 -https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.5-hd57cbcb_0.conda#d34eed0a4fb993f0d934db6394ba23ef -https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.18-hac1461d_0.conda#3d131584456b277ce0871e6481fde49b -https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.5.0-hf0c8a7f_1.conda#6c81cb022780ee33435cca0127dd43c9 +https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h10d778d_5.conda#6097a6ca9ada32699b5fc4312dd6ef18 +https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2024.6.2-h8857fd0_0.conda#3c23a8cab15ae51ebc9efdc229fccecf +https://conda.anaconda.org/conda-forge/osx-64/icu-73.2-hf5e326d_0.conda#5cc301d759ec03f28328428e28f65591 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h0dc2134_1.conda#9e6c31441c9aa24e41ace40d6151aab6 +https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.20-h49d49c5_0.conda#d46104f6a896a0bc6a1d37b88b2edf5c +https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.6.2-h73e2aa4_0.conda#3d1d51c8f716d97c864d12f7af329526 https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2#ccb34fb14960ad8b125962d3d79b31a9 -https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-11.3.0-h824d247_31.conda#ea203ba0aca5cd594aa3b1a2b32e5978 -https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hac89ed1_0.tar.bz2#691d103d11180486154af49c037b7ed9 -https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-2.1.5.1-hb7f2c08_0.conda#d7309a152b9b79799063b8bb47e34a3a -https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.3.0-hb7f2c08_0.conda#18981e4c840126d6118d8952485fea51 -https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-hfd90126_4.tar.bz2#35eb3fce8d51ed3c1fd4122bad48250b -https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-16.0.5-hff08bdf_0.conda#af8df1a61e8137e3479b0f71d5bd0a49 -https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2022.1.0-h6bab518_928.tar.bz2#67f8511a5eaf693a202486f74035b3f7 -https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.4-hf0c8a7f_0.conda#c3dbae2411164d9b02c69090a9a91857 +https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-12.3.0-h0b6f5ec_3.conda#39eeea5454333825d72202fae2d5e0b8 +https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hd75f5a5_2.conda#6c3628d047e151efba7cf08c5e54d1ca +https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.0.0-h0dc2134_1.conda#72507f8e3961bc968af17435060b6dd6 +https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.4.0-h10d778d_0.conda#b2c0047ea73819d992484faacbbe1c24 +https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2023.2.0-h6bab518_50500.conda#835abb8ded5e26f23ea6996259c7972e +https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.5-h5846eda_0.conda#02a888433d165c99bf09784a7b14d900 https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-hc929b4f_1001.tar.bz2#addd19059de62181cd11ae8f4ef26084 -https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.11-3_cp311.conda#5e0a069a585445333868d2c6651c3b3f -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a +https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.12-4_cp312.conda#87201ac4314b911b74197e588cca3639 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.11-h0dc2134_0.conda#9566b4c29274125b0266d0177b5eb97b https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.3-h35c211d_0.tar.bz2#86ac76d6bf1cbb9621943eb3bd9ae36e https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2#a72f9d4ea13d55d745ff1ed594747f10 -https://conda.anaconda.org/conda-forge/osx-64/gmp-6.2.1-h2e338ed_0.tar.bz2#dedc96914428dae572a39e69ee2a392f -https://conda.anaconda.org/conda-forge/osx-64/isl-0.25-hb486fe8_0.tar.bz2#45a9a46c78c0ea5c275b535f7923bde3 -https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.0.9-hb7f2c08_8.tar.bz2#7f952a036d9014b4dab96c6ea0f8c2a7 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.0.9-hb7f2c08_8.tar.bz2#b36a3bfe866d9127f25f286506982166 -https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-12.2.0-he409387_31.conda#5a544130e584b1f204ac896ff071d5b3 -https://conda.anaconda.org/conda-forge/osx-64/libllvm14-14.0.6-hc8e404f_3.conda#a6433d7252b49c2195f8aa70ad898104 -https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.39-ha978bb4_0.conda#35e4928794c5391aec14ffdf1deaaee5 -https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.42.0-h58db7d2_0.conda#a7d3b44b7b0c9901ac7813b7a0462893 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_1.conda#9ee0bab91b2ca579e10353738be36063 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_1.conda#8a421fe09c6187f0eb5e2338a8a8be6d +https://conda.anaconda.org/conda-forge/osx-64/libcxx-17.0.6-h88467a6_0.conda#0fe355aecb8d24b8bc07c763209adbd9 https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.15-hb7f2c08_0.conda#5513f57e0238c87c12dffedbcc9c1a4a -https://conda.anaconda.org/conda-forge/osx-64/openssl-3.1.1-h8a1eda9_1.conda#c7822d6ee74e34af1fd74365cfd18983 +https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.1-h87427d6_1.conda#b7575b5aa92108dcc9aaab0f05f2dbce +https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-18.1.7-h15ab845_0.conda#57440310d92e93efd808c75fec50f94d +https://conda.anaconda.org/conda-forge/osx-64/openssl-3.3.1-h87427d6_0.conda#1bdad93ae01353340f194c5d879745db https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda#f17f77f2acf4d344734bda76829ce14e -https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108 -https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.9.0-hb8565cd_0.conda#6aedf8fdcdf5f2d7b4db21853a7d42ed -https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.12-h5dbffcc_0.tar.bz2#8e9480d9c47061db2ed1b4ecce519a7f -https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-hfd90126_4.tar.bz2#be90e6223c74ea253080abae19b3bdb1 -https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.2-hbc0c0cd_6.conda#40a188783d3c425bdccc9ae9104acbb8 -https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.0.9-hb7f2c08_8.tar.bz2#aac5ad0d8f747ef7f871508146df75d9 -https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h3f81eb7_1.conda#852224ea3e8991a8342228eab274840e -https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp14-14.0.6-default_hdb78580_1.conda#9a235664bf087994aa3acc1a60614964 -https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-11_3_0_h97931a8_31.conda#97451338600bd9c5b535eb224ef6c471 -https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.5.0-hedf67fa_6.conda#800b810c1aa3eb4a08106698441871bb -https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-14.0.6-hc8e404f_3.conda#3bebd091daab84c54f91205bb4d4a9c3 -https://conda.anaconda.org/conda-forge/osx-64/mkl-2022.1.0-h860c996_928.tar.bz2#98a4d58de0ba6e61ce46620b775c19ce -https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.0-h4f9bd69_0.conda#f48a2f4515be334c5cfeed82517b96e0 -https://conda.anaconda.org/conda-forge/osx-64/python-3.11.3-h99528f9_0_cpython.conda#c3291f9411424fc587d53a2ea57fb075 +https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h0dc2134_1.conda#ece565c215adcc47fc1db4e651ee094b +https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-h73e2aa4_1.conda#92f8d748d95d97f92fc26cfac9bb5b6e +https://conda.anaconda.org/conda-forge/osx-64/isl-0.26-imath32_h2e86a7b_101.conda#d06222822a9144918333346f145b68c6 +https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55 +https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda#e4fb4d23ec2870ff3c40d10afe305aec +https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.43-h92b6c6a_0.conda#65dcddb15965c9de2c0365cb14910532 +https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.45.3-h92b6c6a_0.conda#68e462226209f35182ef66eda0f794ff +https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.7-h3e169fe_1.conda#ddb63049aa7bd9f08f2cdc5a1c144d1a +https://conda.anaconda.org/conda-forge/osx-64/ninja-1.12.1-h3c5361c_0.conda#a0ebabd021c8191aeb82793fe43cfdcb https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865 -https://conda.anaconda.org/conda-forge/osx-64/brotli-1.0.9-hb7f2c08_8.tar.bz2#55f612fe4a9b5f6ac76348b6de94aaeb -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/osx-64/clang-14-14.0.6-default_hdb78580_1.conda#ce19ccaee311132f299ffd0eec9c4581 +https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108 +https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h1abcd95_1.conda#bf830ba5afc507c6232d4ef0fb1a882d +https://conda.anaconda.org/conda-forge/osx-64/zlib-1.3.1-h87427d6_1.conda#3ac9ef8975965f9698dbedd2a4cc5894 +https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.6-h915ae27_0.conda#4cb2cd56f039b129bb0e491c1164167e +https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h0dc2134_1.conda#9272dd3b19c4e8212f8542cefd5c3d67 +https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h60636b9_2.conda#25152fce119320c980e5470e64834b50 +https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-13_2_0_h97931a8_3.conda#0b6e23a012ee7a9a5f6b244f5a92c1d5 +https://conda.anaconda.org/conda-forge/osx-64/libhwloc-2.10.0-default_h456cccd_1001.conda#d2dc768b14cdf226a30a8eab15641305 +https://conda.anaconda.org/conda-forge/osx-64/libllvm16-16.0.6-hbedff68_3.conda#8fd56c0adc07a37f93bd44aa61a97c90 +https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.6.0-h129831d_3.conda#568593071d2e6cea7b5fc1f75bfa10ca +https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.1-h4f6b447_1.conda#b90df08f0deb2f58631447c1462c92a7 +https://conda.anaconda.org/conda-forge/osx-64/python-3.12.3-h1411813_0_cpython.conda#df1448ec6cbf8eceb03d29003cf72ae6 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/osx-64/cython-0.29.35-py311hdf8f085_0.conda#29e8e9b57704e153d6a5ffced82262da -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.10-py312hede676d_0.conda#3008aa88f0dc67e7144734b16e331ee4 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.4-py311hd2070f0_1.tar.bz2#5219e72a43e53e8f6af4fdf76a0f90ef -https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.15-h2dcdeff_1.conda#f1df9b0c2d9fbe985e62f4b24773a9e4 -https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-609-hfd63004_13.conda#58fcda6a84fb42f51c6c2d6d175b435d -https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-17_osx64_mkl.conda#e5d4b69958f8eb30b932828880b847f3 +https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.5-py312h49ebfd2_1.conda#21f174a5cfb5964069c374171a979157 +https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.16-ha2f27b4_0.conda#1442db8f03517834843666c422238c9b +https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-711-ha20a434_0.conda#a8b41eb97c8a9d618243a79ba78fdc3c +https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp16-16.0.6-default_h4c8afb6_7.conda#784816790fe438443354d13050fcd67d https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494 -https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2022.1.0-h694c41f_929.tar.bz2#041ceef009fe6d29cbd2555907c23ab3 +https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-16.0.6-hbedff68_3.conda#e9356b0807462e8f84c1384a8da539a5 https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.0-h13ac156_2.conda#299a29af9ac9f550ad459d655739280b -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe +https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.2-h7310d3a_0.conda#05a14cc9d725dd74995927968d6547e3 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.12.0-h3c5361c_1.conda#e23dd312f13ffe470cc4fdeaddc7a32e +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/osx-64/tornado-6.3.2-py311h2725bcf_0.conda#276fe4341e39dcd9d9d33ca18140d2e7 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/osx-64/ccache-4.8.1-h28e096f_0.conda#dcc8cc97fdab7a5fad9e1a6bbad9ed0e -https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-973.0.1-hcc6d90d_13.conda#76e5fa849e2042cd657d9eec96095680 -https://conda.anaconda.org/conda-forge/osx-64/clang-14.0.6-h694c41f_1.conda#1305da4c85c7eaa2e90fa14efc35f591 -https://conda.anaconda.org/conda-forge/osx-64/coverage-7.2.7-py311h2725bcf_0.conda#afba3a3f74c5f71ebd9f400871e8c4de -https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.39.4-py311h2725bcf_0.conda#250388f6d2c5a20066a95cf872e22495 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-11.3.0-h1f927f5_31.conda#926da9259d77f6a95d60c5a956425c2f -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/osx-64/ld64-609-hc6ad406_13.conda#5d7676eee44dfa3e48bf21700e044aa9 -https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-17_osx64_mkl.conda#5adcad22978f80fa101047022e79d9eb -https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-17_osx64_mkl.conda#5557060dea295fcbb224be17b3947d16 -https://conda.anaconda.org/conda-forge/osx-64/pillow-9.5.0-py311h7cb0e2d_1.conda#bf4feca7fd63e619c39ab32eac625edf -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/osx-64/cctools-973.0.1-h76f1dac_13.conda#802cae917abdc5a7cdfa699ff02da42d -https://conda.anaconda.org/conda-forge/osx-64/clangxx-14.0.6-default_hdb78580_1.conda#cc2ac1c5c838cb0edd65258da7c38294 -https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-17_osx64_mkl.conda#678af3918e54ac46249290a05e7e69b1 -https://conda.anaconda.org/conda-forge/osx-64/numpy-1.24.3-py311hc44ba51_0.conda#6c4b3bbdc10013352324d4cc366edb17 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-17_osx64_mkl.conda#b40b415e2be4d0d2a8d05d0f805240b7 -https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-14.0.6-hab78ec2_0.tar.bz2#4fdde3f4ed31722a1c811723f5db82f0 -https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.0.7-py311hd2070f0_0.conda#d78f75103409d2c7a8774c873821ae9a -https://conda.anaconda.org/conda-forge/osx-64/pandas-2.0.2-py311hab14417_0.conda#a490b12cf9ba39a6968000e93826c283 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/osx-64/blas-2.117-mkl.conda#4c921079b5298ce08bb336fc025b96d7 -https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-14.0.6-h613da45_0.tar.bz2#b44e0625319f9933e584dc3b96f5baf7 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.7.1-py311h2bf763f_0.conda#d67ac9c9b834ae77ff7b2c59f702803c -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/osx-64/scipy-1.10.1-py311h16c3c4d_3.conda#a3ba8e96a7511ef8c3b61d28a68da6ed -https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-14.0.6-h3113cd8_6.conda#1b191288877fac1564184b28ce07de84 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.7.1-py311h6eed73b_0.conda#c112be16f02d1c68de63ae3ec6fc7db4 -https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.0.0-py311h349b758_0.conda#a6c92bfaa34aa9c3211ede51e683c43f -https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.5.2-hbf74d83_0.conda#c1413ef5a20d658923e12dd3b566d8f3 -https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-14.0.6-h6f97653_6.conda#3989d08f74e7d987e94d9003cea30080 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-11.3.0-h18f7dce_1.conda#4e066d81dd3b86556b723021980f4ed8 -https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.5.2-hb8565cd_0.conda#349ae14723b98f76ea0fcb8e532b2ead -https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.2.0-h2c809b3_1.conda#4a5cb3bf02a98991321a1f8ec4d8c817 -https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.5.2-haad3a49_0.conda#649a324b13eb77c6d5e98d36ea0c59f4 -https://conda.anaconda.org/conda-forge/osx-64/compilers-1.5.2-h694c41f_0.conda#1fdd3bc173dad6e7a0439962c7764ab8 +https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4.1-py312hbd25219_0.conda#5a40db69b327c71511248f8186965bd3 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/osx-64/ccache-4.9.1-h41adc32_0.conda#45aaf96b67840bd98a928de8679098fa +https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-986-ha1c5b94_0.conda#a8951de2506df5649f5a3295fdfd9f2c +https://conda.anaconda.org/conda-forge/osx-64/clang-16-16.0.6-default_h4c8afb6_7.conda#c9da6a62b571cac3707db69610ed7bd3 +https://conda.anaconda.org/conda-forge/osx-64/coverage-7.5.3-py312hbd25219_0.conda#135eeb22a4da903e2d06c4323b459003 +https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.53.0-py312hbd25219_0.conda#ce2e9b0279cbbae03017ec7be748b255 +https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-12.3.0-hc328e78_3.conda#b3d751dc7073bbfdfa9d863e39b9685d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/osx-64/ld64-711-ha02d983_0.conda#3ae4930ec076735cce481e906f5192e0 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/osx-64/mkl-2023.2.0-h54c2260_50500.conda#0a342ccdc79e4fcd359245ac51941e7b +https://conda.anaconda.org/conda-forge/osx-64/pillow-10.3.0-py312h0c923fa_0.conda#6f0591ae972e9b815739da3392fbb3c3 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/osx-64/cctools-986-h40f6528_0.conda#b7a2ca0062a6ee8bc4e83ec887bef942 +https://conda.anaconda.org/conda-forge/osx-64/clang-16.0.6-hd4457cd_7.conda#0f91e4c1d9d85887db66ddbc185d65d4 +https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-20_osx64_mkl.conda#160fdc97a51d66d51dc782fb67d35205 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2023.2.0-h694c41f_50500.conda#1b4d0235ef253a1e19459351badf4f9f +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/osx-64/clangxx-16.0.6-default_ha3b9224_7.conda#00c8a212cbbd427dcbcc4231b23ddc5e +https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-20_osx64_mkl.conda#51089a4865eb4aec2bc5c7468bd07f9f +https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-20_osx64_mkl.conda#58f08e12ad487fac4a08f90ff0b87aec +https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-16.0.6-ha38d28d_2.conda#7a46507edc35c6c8818db0adaf8d787f +https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-20_osx64_mkl.conda#124ae8e384268a8da66f1d64114a1eda +https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py312he3a82b2_0.conda#96c61a21c4276613748dba069554846b +https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-20_osx64_mkl.conda#cc3260179093918b801e373c6e888e02 +https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-16.0.6-ha38d28d_2.conda#3b9e8c5c63b8e86234f499490acd85c2 +https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.2.1-py312h9230928_0.conda#079df34ce7c71259cfdd394645370891 +https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.2-py312h1171441_1.conda#240737937f1f046b0e03ecc11ac4ec98 +https://conda.anaconda.org/conda-forge/osx-64/scipy-1.13.1-py312hb9702fa_0.conda#46cb49e67c33f8340a09e49e69adf195 +https://conda.anaconda.org/conda-forge/osx-64/blas-2.120-mkl.conda#b041a7677a412f3d925d8208936cb1e2 +https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-16.0.6-h8787910_16.conda#c50c939d1bf9785561220b2cfbb98cb9 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.8.4-py312hb6d62fa_2.conda#6c5cf505d118f4b58961191fd5e0d030 +https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.1.0-py312h44e70fa_1.conda#ffbfe3b3d5e9675541ee516badfb7729 +https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-16.0.6-hb91bd55_16.conda#b5dacba087761db21ba9eb69b2c1718b +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.8.4-py312hb401068_2.conda#456c057a3e2dcac3d02f4b9d25e277f5 +https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.7.0-h282daa2_1.conda#d27411cb82bc1b76b9f487da6ae97f1d +https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-16.0.6-h6d92fbe_16.conda#55fb2d5cbc9ec490347b1f797536fba8 +https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1 +https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-16.0.6-hb91bd55_16.conda#5879c43528a2601d04d64c5f9fdf3033 +https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.3.0-h2c809b3_1.conda#c48adbaa8944234b80ef287c37e329b0 +https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.7.0-h7728843_1.conda#e04cb15a20553b973dd068c2dc81d682 +https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.7.0-h6c2ab21_1.conda#48319058089f492d5059e04494b81ed9 +https://conda.anaconda.org/conda-forge/osx-64/compilers-1.7.0-h694c41f_1.conda#875e9b06186a41d55b96b9c1a52f15be diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml index 4ddb80c7cae3d..ad177e4ed391b 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml @@ -15,9 +15,11 @@ dependencies: - pandas - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python - pytest-cov - coverage - ccache diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml index e32b4adc6ea3e..7e85b28b3f6c4 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml @@ -7,17 +7,21 @@ dependencies: - python - numpy - blas[build=mkl] - - scipy - - cython + - scipy<1.12 - joblib - - threadpoolctl - matplotlib - pandas - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja - pytest-cov - coverage - ccache + - pip + - pip: + - cython + - threadpoolctl + - meson-python diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock index 1e1ae5e4ff3e6..d38d4fcd82d6f 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock @@ -1,95 +1,86 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 808a9ca502dcdd93d1b689ad8ff08d74228790f74a1f707c0054ee97dad6a742 +# input_hash: 33a102d2ccde4e14e315a98b50340349af349f802403dd49589375b2b889f2d3 @EXPLICIT https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a -https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h1de35cc_0.conda#19fcb113b170fe2a0be96b47801fed7d -https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2023.01.10-hecd8cb5_0.conda#4544150389480f19dd67c20b3bb12d61 -https://repo.anaconda.com/pkgs/main/osx-64/giflib-5.2.1-h6c40b1e_3.conda#a5ab49bdb6fdc875fb965221241e3bcf +https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h6c40b1e_6.conda#96224786021d0765ce05818fa3c59bdb +https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2024.3.11-hecd8cb5_0.conda#a2e29a11940c66baf9942912096fad5f https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h6c40b1e_1.conda#fc3e61fa41309946c9283fe8737d7f41 -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-hca72f7f_7.conda#6c865b9e76fa2fad0c8ac32aa0f01f75 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-h6c40b1e_8.conda#8e86dfa34b08bc664b19e1499e5465b8 https://repo.anaconda.com/pkgs/main/osx-64/libcxx-14.0.6-h9765a3e_0.conda#387757bb354ae9042370452cd0fb5627 -https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.17-hb664fd8_0.conda#4236b26b451011822d3a3086282063c0 -https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_0.conda#c20b2687118c471b1d70067ef2b2703f -https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.2.4-h6c40b1e_1.conda#b5ba90f49396f024ee017794b28e8263 +https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.17-hb664fd8_1.conda#b6116b8db33ea6a5b5287dae70d4a913 +https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_1.conda#eb7f09ada4d95f1a26f483f1009d9286 +https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h6c40b1e_0.conda#d8fd9f599dd4e012694e69d119016442 https://repo.anaconda.com/pkgs/main/osx-64/llvm-openmp-14.0.6-h0dcd299_0.conda#b5804d32b87dc61ca94561ade33d5f2d https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.4-hcec6c5f_0.conda#0214d1ee980e217fabc695f1e40662aa -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda#29db02adf8808f7c64642cead3e28acd -https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.2-h6c40b1e_0.conda#5e546d3c9765b4441e511804d58f6e3f -https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4dc903c_0.conda#d0202dd912bfb45d3422786531717882 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.6-h6c40b1e_1.conda#b40d69768d28133d8be1843def4f82f5 +https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4b97444_1.conda#38e35f7c817fac0973034bfce6706ec2 https://repo.anaconda.com/pkgs/main/osx-64/ccache-3.7.9-hf120daa_0.conda#a01515a32e721c51d631283f991bc8ea -https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2023.1.0-ha357a0b_43547.conda#aa6031369dd8c8cc6b2f393a0b2d9f0c +https://repo.anaconda.com/pkgs/main/osx-64/expat-2.6.2-hcec6c5f_0.conda#c748234dd7e242784198ab038372cb0c +https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2023.1.0-ha357a0b_43548.conda#ba8a89ffe593eb88e4c01334753c40c3 https://repo.anaconda.com/pkgs/main/osx-64/lerc-3.0-he9d5cce_0.conda#aec2c3dbef836849c9260f05be04f3db -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-hca72f7f_7.conda#b85983951745cc666d9a1b42894210b2 -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-hca72f7f_7.conda#e306d7a1599202a7c95762443f110832 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-h6c40b1e_8.conda#6338cd7779e614fc16d835990e627e04 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-h6c40b1e_8.conda#2af01a7b3fdbed47ebe5c452c34e5c5d https://repo.anaconda.com/pkgs/main/osx-64/libgfortran5-11.3.0-h9dfd629_28.conda#1fa1a27ee100b1918c3021dbfa3895a3 https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.39-h6c40b1e_0.conda#a3c824835f53ad27aeb86d2b55e47804 -https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_0.conda#44291e9e6920cfff30caf1299f48db38 -https://repo.anaconda.com/pkgs/main/osx-64/openssl-1.1.1t-hca72f7f_0.conda#5027baac278975d148ee3887b3f4e911 +https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_1.conda#aee0efbb45220e1985533dbff48551f8 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-base-1.10.2-haf03e11_5.conda#c857c13129710a61395270656905c4a2 +https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.13-hca72f7f_2.conda#4f840ec6217dff98040ff6be19cf3afb https://repo.anaconda.com/pkgs/main/osx-64/readline-8.2-hca72f7f_0.conda#971667436260e523f6f7355fdfa238bf https://repo.anaconda.com/pkgs/main/osx-64/tbb-2021.8.0-ha357a0b_0.conda#fb48530a3eea681c11dafb95b3387c0f -https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.12-h5d9f67b_0.conda#047f0af5486d19163e37fd7f8ae3d29f -https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-hca72f7f_7.conda#110bdca1a20710820e61f7fa3047f737 +https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.14-h4d00af3_0.conda#a2c03940c2ae54614301ec82e6a98d75 +https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-h6c40b1e_8.conda#11053f9c6b8d8a8348d0c33450c23ce9 https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.12.1-hd8bbffd_0.conda#1f276af321375ee7fe8056843044fa76 https://repo.anaconda.com/pkgs/main/osx-64/libgfortran-5.0.0-11_3_0_hecd8cb5_28.conda#2eb13b680803f1064e53873ae0aaafb3 -https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h59209a4_43558.conda#898a058caf42cf8b706034be6e5b2d50 -https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.41.2-h6c40b1e_0.conda#6947a501943529c7536b7e4ba53802c1 -https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_0.conda#5e0b7ddb1b7dc6b630e1f9a03499c19c -https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-hca72f7f_7.conda#68e54d12ec67591deb2ffd70348fb00f -https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.0-hcec6c5f_2.conda#f0b033a82af1bd028f112cdecef1fe0a -https://repo.anaconda.com/pkgs/main/osx-64/python-3.11.3-h1fd4e5f_0.conda#df6f985ea9100007789662afeca11311 -https://repo.anaconda.com/pkgs/main/noarch/appdirs-1.4.4-pyhd3eb1b0_0.conda#5673d98d06171cb6eed03a6736845c4d -https://repo.anaconda.com/pkgs/main/osx-64/attrs-22.1.0-py311hecd8cb5_0.conda#d87b931f00c25263ede3d7ec691389af -https://repo.anaconda.com/pkgs/main/osx-64/certifi-2023.5.7-py311hecd8cb5_0.conda#c7cb5a9de1041b8b59f92089bd9aa55e -https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda#e7a441d94234b2b5fafee06e25dbf076 -https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py311h6c40b1e_0.conda#e15605553450156cf75c3ae38a920475 +https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h8e150cf_43560.conda#85d0f3431dd5c6ae44f8725fdd3d3e59 +https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.45.3-h6c40b1e_0.conda#2edf909b937b3aad48322c9cb2e8f1a0 +https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_2.conda#c033bf68c12f8c71fd916f000f3dc118 +https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-h6c40b1e_8.conda#10f89677a3898d0113dc354adf643df3 +https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.1-hcec6c5f_0.conda#e127a800ffd9d300ed7d5e1b026944ec +https://repo.anaconda.com/pkgs/main/osx-64/python-3.12.3-hd58486a_1.conda#cdc61e8f6c2d77b3b263e720048c4b54 +https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py312h6c40b1e_0.conda#b6e4b9fba325047c07f3c9211ae91d1c https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/osx-64/cython-0.29.33-py311hcec6c5f_0.conda#9865281df3b2e61f46dc189ae46c5abc https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 -https://repo.anaconda.com/pkgs/main/osx-64/idna-3.4-py311hecd8cb5_0.conda#48ab3e9b53e5607abe86a920cd37e13a https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.2.0-py311hecd8cb5_0.conda#af8c1fcd4e8e0c6fa2a4f4ecda261dc9 -https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py311hcec6c5f_0.conda#f2cf31e2a762f071fd6bc4d74ea2bfc8 +https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.4.0-py312hecd8cb5_0.conda#0af12a3a87d9c8051ae6ba2ed2c3882a +https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py312hcec6c5f_0.conda#2ba6561ddd1d05936fe74f5d118ce7dd https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.12-hf1fd2bf_0.conda#697aba7a3308226df7a93ccfeae16ffa -https://repo.anaconda.com/pkgs/main/osx-64/libwebp-1.2.4-hf6ce154_1.conda#07d0981c3847293d4aea5778298a12d3 -https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py311h6c40b1e_1.conda#f709b80c57a0fcc577319920d1b7228b -https://repo.anaconda.com/pkgs/main/noarch/munkres-1.1.4-py_0.conda#148362ba07f92abab76999a680c80084 -https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.0-py311hecd8cb5_0.conda#456989f87701680b35cab3edc49e223d -https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py311hecd8cb5_1.conda#98e4da64cd934965a0caf4136280ff35 -https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e -https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda#135a72ff2a31150a3a3ff0b1edd41ca9 -https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py311hecd8cb5_0.conda#a4262f849ecc82af69f58da0cbcaaf04 -https://repo.anaconda.com/pkgs/main/osx-64/pysocks-1.7.1-py311hecd8cb5_0.conda#6a9c1a311e30a9776b3297fe1480fa38 -https://repo.anaconda.com/pkgs/main/osx-64/pytz-2022.7-py311hecd8cb5_0.conda#87c5590ad0bdf9c5c76feb22b7fbd5ba -https://repo.anaconda.com/pkgs/main/osx-64/setuptools-67.8.0-py311hecd8cb5_0.conda#9a01cd68b3c26dbdb25f31ee5b32819f +https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py312h6c40b1e_1.conda#b1ef860be9043b35c5e8d9388b858514 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-1.10.2-hecd8cb5_5.conda#a0043b325fb08db82477ae433668e684 +https://repo.anaconda.com/pkgs/main/osx-64/openjpeg-2.4.0-h66ea3da_0.conda#882833bd7befc5e60e6fba9c518c1b79 +https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.2-py312hecd8cb5_0.conda#2b4e331c8f6df5d95a5dd3af37a34d89 +https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py312hecd8cb5_1.conda#647fada22f1697691fdee90b52c99bcb +https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py312hecd8cb5_0.conda#d85cf2b81c6d9326a57a6418e14db258 +https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2023.3-pyhd3eb1b0_0.conda#479c037de0186d114b9911158427624e +https://repo.anaconda.com/pkgs/main/osx-64/pytz-2024.1-py312hecd8cb5_0.conda#2b28ec0e0d07f5c0c701f75200b1e8b6 +https://repo.anaconda.com/pkgs/main/osx-64/setuptools-69.5.1-py312hecd8cb5_0.conda#5c7c7ef1e0762e3ca1f543d28310946f https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 -https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2 https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/osx-64/tomli-2.0.1-py311hecd8cb5_0.conda#d69dd2914a3eb8cf856a14455dd3f458 -https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.2-py311h6c40b1e_0.conda#04ec029d2ac86baa6140fd0a36c971b6 -https://repo.anaconda.com/pkgs/main/osx-64/cffi-1.15.1-py311h6c40b1e_3.conda#5eb14a7a7187a7593f09dafc7a26ff23 -https://repo.anaconda.com/pkgs/main/noarch/fonttools-4.25.0-pyhd3eb1b0_0.conda#bb9c5b5a6d892fca5efe4bf0203b6a48 -https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.24.3-py311h53bf9ac_1.conda#1b1957e3823208a006d0699999335c7d -https://repo.anaconda.com/pkgs/main/osx-64/pillow-9.4.0-py311hcec6c5f_0.conda#fccbb731e918b59d44372354ff2e24f9 -https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.3.1-py311hecd8cb5_0.conda#0247a6236ee44b38f6f0dc54ca3cbe7a -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/osx-64/brotlipy-0.7.0-py311h6c40b1e_1002.conda#214a3acdf6f828a764263d430826688b -https://repo.anaconda.com/pkgs/main/osx-64/cryptography-39.0.1-py311hf6deb26_0.conda#baf00061474e2c639029b0208d3eaf2e -https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.0.0-py311hecd8cb5_0.conda#c63893569d344f4297f2ae08e0387ccf -https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4 -https://repo.anaconda.com/pkgs/main/osx-64/pyopenssl-23.0.0-py311hecd8cb5_0.conda#d034f753f088967f765030dc5742c1d7 -https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d -https://repo.anaconda.com/pkgs/main/osx-64/urllib3-1.26.15-py311hecd8cb5_0.conda#2ce7c8e3fe61096e275f3d078485f7b6 -https://repo.anaconda.com/pkgs/main/osx-64/requests-2.29.0-py311hecd8cb5_0.conda#5ea75ca544f2a7b0a2660368bf886006 -https://repo.anaconda.com/pkgs/main/noarch/pooch-1.4.0-pyhd3eb1b0_0.conda#69ec83cb3d152f9e854115555004f368 -https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.5-py311hb9e55a9_0.conda#5aa1b58b421d4608b16184f8468253ef -https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.0.5-py311ha357a0b_0.conda#a130f83ba4b5d008e0c134c73e10b8fb -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.7.1-py311hecd8cb5_1.conda#6ec92c9f01ff593b177da73ab17e9f54 -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.7.1-py311h11e8b89_1.conda#316c82d7fe9cc95e8bf3db6466acd8b6 -https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.6-py311hdb55bb0_1.conda#da20367a256f5fa494c9db517ab86f4b -https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.2-py311hdb55bb0_1.conda#9b1de8f6e280fb8e74f186007a0b4ca4 -https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.24.3-py311h728a8a3_1.conda#68069c79ebb0cdd2561026a909a57183 -https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.4-py311h728a8a3_1.conda#be9facbd68b7476262684afb69fd2841 -https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.10.1-py311h224febf_1.conda#a3ae336a401d47b73b17c3b5d780de78 -https://repo.anaconda.com/pkgs/main/osx-64/pandas-1.5.3-py311hc5848a5_0.conda#4111406bad69018aa5e1cb04561a4374 -https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py311h37a6a59_0.conda#5fca7d043dc68c1d7acc22aa03a24918 +https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.3.3-py312h6c40b1e_0.conda#49173b5a36c9134865221f29d4a73fb6 +https://repo.anaconda.com/pkgs/main/osx-64/unicodedata2-15.1.0-py312h6c40b1e_0.conda#65bd2cb787fc99662d9bb6e6520c5826 +https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.43.0-py312hecd8cb5_0.conda#c0bdd5748b170523232e8ad1d667136c +https://repo.anaconda.com/pkgs/main/osx-64/fonttools-4.51.0-py312h6c40b1e_0.conda#8f55fa86b73e8a7f4403503f9b7a9959 +https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.4-py312h6f81483_0.conda#87f73efbf26ab2e2ea7c32481a71bd47 +https://repo.anaconda.com/pkgs/main/osx-64/pillow-10.3.0-py312h6c40b1e_0.conda#fe883fa4247d35fe6de49f713529ca02 +https://repo.anaconda.com/pkgs/main/osx-64/pip-24.0-py312hecd8cb5_0.conda#7a8e0b1d3742ddf1c8aa97fbaa158039 +https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.4-py312hecd8cb5_0.conda#d4dda983900b045cd27ae836cad670de +https://repo.anaconda.com/pkgs/main/osx-64/python-dateutil-2.9.0post0-py312hecd8cb5_2.conda#1047dde28f78127dd9f6121e882926dd +https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.1.0-py312hecd8cb5_1.conda#a33a24eb20359f464938e75b2f57e23a +https://repo.anaconda.com/pkgs/main/osx-64/pytest-xdist-3.5.0-py312hecd8cb5_0.conda#d1ecfb3691cceecb1f16bcfdf0b67bb5 +https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.7-py312h32608ca_0.conda#f96a01eba5ea542cf9c7cc8d77447627 +https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.2.0-py312ha357a0b_0.conda#57d384ad07152375b40a6293f79e3f0c +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.8.4-py312hecd8cb5_0.conda#6886c230c2ec2f47621b5cca4c7d493a +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.8.4-py312h7f12edd_0.conda#a4eee14a4dcaa89b306ca33d2d479fa4 +https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.8-py312h6c40b1e_0.conda#d59d01b940493f2b6a84aac922fd0c76 +https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.4-py312ha357a0b_0.conda#c1ea9c8eee79a5af3399f3c31be0e9c6 +https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.4-py312hac873b0_0.conda#3150bac1e382156f82a153229e1ebd06 +https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.7-py312hac873b0_0.conda#6303ba071636ef57fddf69eb6f440ec1 +https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.4-py312h81688c2_0.conda#7d57b4c21a9261f97fa511e0940c5d93 +https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.2.1-py312he282a81_0.conda#021b70a1e40efb75b89eb8ebdb347132 +https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py312h44cbcf4_0.conda#3bdc7be74087b3a5a83c520a74e1e8eb +# pip cython @ https://files.pythonhosted.org/packages/d5/6d/06c08d75adb98cdf72af18801e193d22580cc86ca553610f430f18ea26b3/Cython-3.0.10-cp312-cp312-macosx_10_9_x86_64.whl#sha256=8f2864ab5fcd27a346f0b50f901ebeb8f60b25a60a575ccfd982e7f3e9674914 +# pip meson @ https://files.pythonhosted.org/packages/44/b2/d4433391a7c5e94a39b50ca7295a8ceba736e7c72c455752a60122f52453/meson-1.4.1-py3-none-any.whl#sha256=d5acc3abae2dad3c70ddcbd10acac92b78b144d34d43f40f5b8ac31dfd8a826a +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 +# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml index ddbc75c1d9110..adb7add7622e1 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml +++ b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml @@ -17,9 +17,10 @@ dependencies: - pandas - pyamg - pytest - - pytest-xdist==2.5.0 + - pytest-xdist - pillow - - setuptools + - ninja + - meson-python - pytest-cov - coverage - sphinx diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock index eab6dc087f26d..449ca9a70121d 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock @@ -1,89 +1,88 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 61862ec58344ddfaad255f4687ca311eb7e2e61001e209d63f0cc92f97178848 +# input_hash: 37f8029b6bb116e0d5856093424791a8c1ddc3f493e20fcb5d02cd32d516523d @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda#29db02adf8808f7c64642cead3e28acd +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32 -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_2.conda#0019453c25f5e72129f99236e60febaa +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.16-h7a1cb2a_2.conda#6b4f255f11b3facb3fa17061757b8cc2 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py39h06a4308_0.conda#3d40bf5ad5f24b0c96624efd2cff1c80 -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py39h06a4308_0.conda#83e731cfecb3797a0f2865615177f433 -https://repo.anaconda.com/pkgs/main/linux-64/pip-23.0.1-py39h06a4308_0.conda#e36d76b4611ca9b5d8bd180232aecbac -# pip alabaster @ https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl#sha256=1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3 -# pip babel @ https://files.pythonhosted.org/packages/df/c4/1088865e0246d7ecf56d819a233ab2b72f7d6ab043965ef327d0731b5434/Babel-2.12.1-py3-none-any.whl#sha256=b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610 -# pip certifi @ https://files.pythonhosted.org/packages/9d/19/59961b522e6757f0c9097e4493fa906031b95b3ebe9360b2c3083561a6b4/certifi-2023.5.7-py3-none-any.whl#sha256=c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716 -# pip charset-normalizer @ https://files.pythonhosted.org/packages/33/97/9967fb2d364a9da38557e4af323abcd58cc05bdd8f77e9fd5ae4882772cc/charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706 -# pip cycler @ https://files.pythonhosted.org/packages/5c/f9/695d6bedebd747e5eb0fe8fad57b72fdf25411273a39791cde838d5a8f51/cycler-0.11.0-py3-none-any.whl#sha256=3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3 -# pip cython @ https://files.pythonhosted.org/packages/01/fd/5e489abe8ee99a52366b5ae99518b64f6024c6dd331b4d75a6a9ac48f429/Cython-0.29.35-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl#sha256=c4cd7de707938b8385cd1f88e1446228fbfe09af7822fa13877a4374c4881198 -# pip docutils @ https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl#sha256=96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 -# pip exceptiongroup @ https://files.pythonhosted.org/packages/61/97/17ed81b7a8d24d8f69b62c0db37abbd8c0042d4b3fc429c73dab986e7483/exceptiongroup-1.1.1-py3-none-any.whl#sha256=232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e -# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142 -# pip fonttools @ https://files.pythonhosted.org/packages/ad/5f/20da4f41e33e77723b0100ded6539529bd159319ed49d6459a4647cdc7ee/fonttools-4.39.4-py3-none-any.whl#sha256=106caf6167c4597556b31a8d9175a3fdc0356fdcd70ab19973c3b0d4c893c461 -# pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_1.conda#4b453281859c293c9d577271f3b18a0d +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py39h06a4308_0.conda#3eb144d481b39c0fbbced789dd9b76b3 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py39h06a4308_0.conda#40bb60408c7433d767fd8c65b35bc4a0 +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py39h06a4308_0.conda#7f8ce3af15cfecd12e4dda8c5cef5fb7 +# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 +# pip babel @ https://files.pythonhosted.org/packages/27/45/377f7e32a5c93d94cd56542349b34efab5ca3f9e2fd5a68c5e93169aa32d/Babel-2.15.0-py3-none-any.whl#sha256=08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb +# pip certifi @ https://files.pythonhosted.org/packages/5b/11/1e78951465b4a225519b8c3ad29769c49e0d8d157a070f681d5b6d64737f/certifi-2024.6.2-py3-none-any.whl#sha256=ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/98/69/5d8751b4b670d623aa7a47bef061d69c279e9f922f6705147983aa76c3ce/charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 +# pip cycler @ https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl#sha256=85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 +# pip cython @ https://files.pythonhosted.org/packages/a7/f5/3dde4d96076888ceaa981827b098274c2b45ddd4b20d75a8cfaa92b91eec/Cython-3.0.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=651a15a8534ebfb9b58cb0b87c269c70984b6f9c88bfe65e4f635f0e3f07dfcd +# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 +# pip exceptiongroup @ https://files.pythonhosted.org/packages/01/90/79fe92dd413a9cab314ef5c591b5aa9b9ba787ae4cadab75055b0ae00b33/exceptiongroup-1.2.1-py3-none-any.whl#sha256=5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad +# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc +# pip fonttools @ https://files.pythonhosted.org/packages/c1/cb/b1877d606dfa1daca70324bf37afec2b0a386138c467580027b9b51188a8/fonttools-4.53.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ba9f09ff17f947392a855e3455a846f9855f6cf6bec33e9a427d3c1d254c712f +# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b # pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -# pip joblib @ https://files.pythonhosted.org/packages/91/d4/3b4c8e5a30604df4c7518c562d4bf0502f2fa29221459226e140cf846512/joblib-1.2.0-py3-none-any.whl#sha256=091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385 -# pip kiwisolver @ https://files.pythonhosted.org/packages/a4/36/c414d75be311ce97ef7248edcc4fc05afae2998641bf6b592d43a9dee581/kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f -# pip lazy-loader @ https://files.pythonhosted.org/packages/a1/a8/c41f46b47a381bd60a40c0ef00d2fd1722b743b178f9c1cec0da949043de/lazy_loader-0.2-py3-none-any.whl#sha256=c35875f815c340f823ce3271ed645045397213f961b40ad0c0d395c3f5218eeb -# pip markupsafe @ https://files.pythonhosted.org/packages/de/63/cb7e71984e9159ec5f45b5e81e896c8bdd0e45fe3fc6ce02ab497f0d790e/MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e -# pip networkx @ https://files.pythonhosted.org/packages/a8/05/9d4f9b78ead6b2661d6e8ea772e111fc4a9fbd866ad0c81906c11206b55e/networkx-3.1-py3-none-any.whl#sha256=4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36 -# pip numpy @ https://files.pythonhosted.org/packages/83/be/de078ac5e4ff572b1bdac1808b77cea2013b2c6286282f89b1de3e951273/numpy-1.24.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4 -# pip packaging @ https://files.pythonhosted.org/packages/ab/c3/57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121/packaging-23.1-py3-none-any.whl#sha256=994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61 -# pip pillow @ https://files.pythonhosted.org/packages/ff/fc/48a51c0fe2a00d5def57b9981a1e0f8339b516351da7a51500383d833bc8/Pillow-9.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=608488bdcbdb4ba7837461442b90ea6f3079397ddc968c31265c1e056964f1ef -# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 -# pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 -# pip pygments @ https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl#sha256=db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1 -# pip pyparsing @ https://files.pythonhosted.org/packages/6c/10/a7d0fa5baea8fe7b50f448ab742f26f52b80bfca85ac2be9d35cdd9a3246/pyparsing-3.0.9-py3-none-any.whl#sha256=5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc -# pip pytz @ https://files.pythonhosted.org/packages/7f/99/ad6bd37e748257dd70d6f85d916cafe79c0b0f5e2e95b11f7fbc82bf3110/pytz-2023.3-py2.py3-none-any.whl#sha256=a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb +# pip joblib @ https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl#sha256=06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 +# pip kiwisolver @ https://files.pythonhosted.org/packages/c0/a8/841594f11d0b88d8aeb26991bc4dac38baa909dc58d0c4262a4f7893bcbf/kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff +# pip markupsafe @ https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 +# pip meson @ https://files.pythonhosted.org/packages/44/b2/d4433391a7c5e94a39b50ca7295a8ceba736e7c72c455752a60122f52453/meson-1.4.1-py3-none-any.whl#sha256=d5acc3abae2dad3c70ddcbd10acac92b78b144d34d43f40f5b8ac31dfd8a826a +# pip networkx @ https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl#sha256=f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2 +# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b +# pip numpy @ https://files.pythonhosted.org/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 +# pip packaging @ https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl#sha256=5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 +# pip pillow @ https://files.pythonhosted.org/packages/f5/6d/52e82352670e850f468de9e6bccced4202a09f58e7ea5ecdbf08283d85cb/pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl#sha256=1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8 +# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 +# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a +# pip pyparsing @ https://files.pythonhosted.org/packages/9d/ea/6d76df31432a0e6fdf81681a895f009a4bb47b3c39036db3e1b528191d52/pyparsing-3.1.2-py3-none-any.whl#sha256=f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 +# pip pytz @ https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl#sha256=328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl#sha256=29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228 -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl#sha256=c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd -# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 # pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc -# pip typing-extensions @ https://files.pythonhosted.org/packages/5f/86/d9b1518d8e75b346a33eb59fa31bdbbee11459a7e2cc5be502fa779e96c5/typing_extensions-4.6.3-py3-none-any.whl#sha256=88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26 -# pip tzdata @ https://files.pythonhosted.org/packages/d5/fb/a79efcab32b8a1f1ddca7f35109a50e4a80d42ac1c9187ab46522b2407d7/tzdata-2023.3-py2.py3-none-any.whl#sha256=7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda -# pip urllib3 @ https://files.pythonhosted.org/packages/4b/1d/f8383ef593114755429c307449e7717b87044b3bcd5f7860b89b1f759e34/urllib3-2.0.2-py3-none-any.whl#sha256=d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e -# pip zipp @ https://files.pythonhosted.org/packages/5b/fa/c9e82bbe1af6266adf08afb563905eb87cab83fde00a0a08963510621047/zipp-3.15.0-py3-none-any.whl#sha256=48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556 -# pip contourpy @ https://files.pythonhosted.org/packages/c7/97/ba9ace011734cd01b63eb7d39b2cf97afbfa985b0239ab0db85bafa9b207/contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2 -# pip coverage @ https://files.pythonhosted.org/packages/fe/57/e4f8ad64d84ca9e759d783a052795f62a9f9111585e46068845b1cb52c2b/coverage-7.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1 -# pip imageio @ https://files.pythonhosted.org/packages/f7/9d/47d0a9d0f267e9155963db8608ffbc448f2b5d4e5414d8e608309f422094/imageio-2.31.0-py3-none-any.whl#sha256=141bbd97910fad105c179a6b344ae4e7fef0dd85411303c63cd925b4c6163bee -# pip importlib-metadata @ https://files.pythonhosted.org/packages/30/bb/bf2944b8b88c65b797acc2c6a2cb0fb817f7364debf0675792e034013858/importlib_metadata-6.6.0-py3-none-any.whl#sha256=43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed -# pip importlib-resources @ https://files.pythonhosted.org/packages/38/71/c13ea695a4393639830bf96baea956538ba7a9d06fcce7cef10bfff20f72/importlib_resources-5.12.0-py3-none-any.whl#sha256=7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a -# pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 -# pip pytest @ https://files.pythonhosted.org/packages/1b/d1/72df649a705af1e3a09ffe14b0c7d3be1fd730da6b98beb4a2ed26b8a023/pytest-7.3.1-py3-none-any.whl#sha256=3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362 -# pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 -# pip pywavelets @ https://files.pythonhosted.org/packages/5a/98/4549479a32972bdfdd5e75e168219e97f4dfaee535a8308efef7291e8398/PyWavelets-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=71ab30f51ee4470741bb55fc6b197b4a2b612232e30f6ac069106f0156342356 -# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f -# pip scipy @ https://files.pythonhosted.org/packages/5d/30/b2a2a5bf1a3beefb7609fb871dcc6aef7217c69cef19a4631b7ab5622a8a/scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601 -# pip setuptools-scm @ https://files.pythonhosted.org/packages/1d/66/8f42c941be949ef2b22fe905d850c794e7c170a526023612aad5f3a121ad/setuptools_scm-7.1.0-py3-none-any.whl#sha256=73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e -# pip tifffile @ https://files.pythonhosted.org/packages/93/86/2ed10947a1891ceb86b084153fac06877fdec38a5ed69bd9286eefab3d44/tifffile-2023.4.12-py3-none-any.whl#sha256=3161954746fe32c4f4244d0fb2eb0a272f3a3760b78882a42faa83ac5e6e0b74 -# pip matplotlib @ https://files.pythonhosted.org/packages/9f/77/0cd22f92f7103383cb1ce3b3efc77411b9cc3a495242c8f2a623b498f586/matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb -# pip pandas @ https://files.pythonhosted.org/packages/9f/cc/cc8135de2a574fd87940b1d41c9c52d226d3ebc9fc8f6e9f18a7b0a81b57/pandas-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5 -# pip pyamg @ https://files.pythonhosted.org/packages/1f/fe/a5d365335e9ab2b90ac55552b90779889559b1af01cdbd264f82ee5678bf/pyamg-5.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=2373a0ef913c272c8b6a6d8c2dfcf9a1681a1c6806a5b13b668bcb5125bb46b2 -# pip pytest-cov @ https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl#sha256=6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a -# pip pytest-forked @ https://files.pythonhosted.org/packages/f4/af/9c0bda43e486a3c9bf1e0f876d0f241bc3f229d7d65d09331a0868db9629/pytest_forked-1.6.0-py3-none-any.whl#sha256=810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0 -# pip scikit-image @ https://files.pythonhosted.org/packages/19/bd/a53569a0a698d925eb46dbea0bd3b6b62e7287a9ec88b5a03efa8ebd5b14/scikit_image-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=78b1e96c59cab640ca5c5b22c501524cfaf34cbe0cb51ba73bd9a9ede3fb6e1d -# pip scikit-learn @ https://files.pythonhosted.org/packages/81/84/756be2b975959a5f94124d5584ead75d7ca99184f2d16664a0157b274b9a/scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ea061bf0283bf9a9f36ea3c5d3231ba2176221bbd430abd2603b1c3b2ed85c89 -# pip sphinx @ https://files.pythonhosted.org/packages/4b/a9/9760e8373a11a62f5ef66684771b0a5b2c4a699bf0dbbc650ca2b75cec36/sphinx-7.0.1-py3-none-any.whl#sha256=60c5e04756c1709a98845ed27a2eed7a556af3993afb66e77fec48189f742616 -# pip lightgbm @ https://files.pythonhosted.org/packages/38/5c/d9773cf0ea7938f3b777eaacc6f9d58f69ca76a667771364ffefed9095b4/lightgbm-3.3.5-py3-none-manylinux1_x86_64.whl#sha256=044f65664c1a32c98cb619bafa97d8cd9d93c2c2d5053376aadfe509a3a3e7fa -# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9 -# pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65 +# pip tzdata @ https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl#sha256=9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252 +# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d +# pip zipp @ https://files.pythonhosted.org/packages/20/38/f5c473fe9b90c8debdd29ea68d5add0289f1936d6f923b6b9cc0b931194c/zipp-3.19.2-py3-none-any.whl#sha256=f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c +# pip contourpy @ https://files.pythonhosted.org/packages/31/a2/2f12e3a6e45935ff694654b710961b03310b0e1ec997ee9f416d3c873f87/contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445 +# pip coverage @ https://files.pythonhosted.org/packages/07/e0/0e30ca5c6c5bcae86df9583c30807ff26e0b991e76f266b81224410663e4/coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7 +# pip imageio @ https://files.pythonhosted.org/packages/a3/b6/39c7dad203d9984225f47e0aa39ac3ba3a47c77a02d0ef2a7be691855a06/imageio-2.34.1-py3-none-any.whl#sha256=408c1d4d62f72c9e8347e7d1ca9bc11d8673328af3913868db3b828e28b40a4c +# pip importlib-metadata @ https://files.pythonhosted.org/packages/2d/0a/679461c511447ffaf176567d5c496d1de27cbe34a87df6677d7171b2fbd4/importlib_metadata-7.1.0-py3-none-any.whl#sha256=30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570 +# pip importlib-resources @ https://files.pythonhosted.org/packages/75/06/4df55e1b7b112d183f65db9503bff189e97179b256e1ea450a3c365241e0/importlib_resources-6.4.0-py3-none-any.whl#sha256=50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c +# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +# pip lazy-loader @ https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl#sha256=342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 +# pip pytest @ https://files.pythonhosted.org/packages/4e/e7/81ebdd666d3bff6670d27349b5053605d83d55548e6bd5711f3b0ae7dd23/pytest-8.2.2-py3-none-any.whl#sha256=c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 +# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 +# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 +# pip scipy @ https://files.pythonhosted.org/packages/35/f5/d0ad1a96f80962ba65e2ce1de6a1e59edecd1f0a7b55990ed208848012e0/scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d +# pip tifffile @ https://files.pythonhosted.org/packages/d9/6c/740c07588434e86028c24b0653c1eb6b46904d9ce585a20f07590620ec41/tifffile-2024.5.22-py3-none-any.whl#sha256=e281781c15d7d197d7e12749849c965651413aa905f97a48b0f84bd90a3b4c6f +# pip lightgbm @ https://files.pythonhosted.org/packages/ba/11/cb8b67f3cbdca05b59a032bb57963d4fe8c8d18c3870f30bed005b7f174d/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl#sha256=104496a3404cb2452d3412cbddcfbfadbef9c372ea91e3a9b8794bcc5183bf07 +# pip matplotlib @ https://files.pythonhosted.org/packages/d3/6d/45837c5b3d0005a5a9b04729b218a16bf3aa195701c6b33b2cc39ae943b6/matplotlib-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=81c40af649d19c85f8073e25e5806926986806fa6d54be506fbf02aef47d5a89 +# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 +# pip pandas @ https://files.pythonhosted.org/packages/bb/30/f6f1f1ac36250f50c421b1b6af08c35e5a8b5a84385ef928625336b93e6f/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921 +# pip pyamg @ https://files.pythonhosted.org/packages/68/a9/aed9f557e7eb779d2cb4fa090663f8540979e0c04dadd16e9a0bdc9632c5/pyamg-5.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5817d4567fb240dab4779bb1630bbb3035b3827731fcdaeb9ecc9c8814319995 +# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 +# pip scikit-image @ https://files.pythonhosted.org/packages/a3/7e/4cd853a855ac34b4ef3ef6a5c3d1c2e96eaca1154fc6be75db55ffa87393/scikit_image-0.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=3b7a6c89e8d6252332121b58f50e1625c35f7d6a85489c0b6b7ee4f5155d547a +# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 diff --git a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml index b2680f97d98f6..01709b79e3720 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml +++ b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml @@ -10,8 +10,10 @@ dependencies: - pip: - threadpoolctl - pytest - - pytest-xdist==2.5.0 - - setuptools + - pytest-xdist + - pip + - ninja + - meson-python - pytest-cov - coverage - pooch diff --git a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock index 7861c2e43cf99..502d425d2adb7 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock @@ -1,63 +1,67 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: d7687370ba8c822d5b621703d51324b6767f15f0fc49177381f2a0a81a756684 +# input_hash: 8a4a203136d97ff3b2c8657fce2dd2228215bfbf9c1cfbe271e401f934bdf1a7 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda#29db02adf8808f7c64642cead3e28acd +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h7b6447c_0.conda#9303f4af7c004e069bae22bde8d800ee -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32 -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.14-h5eee18b_0.conda#37b6dad6aa49000a4230a9f0cad172f6 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.11.3-h7a1cb2a_0.conda#d4474259a2525cc6fb272f02ca02873e -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py311h06a4308_0.conda#b65f6b9c4547f1fd81af11d4e8b649c4 -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py311h06a4308_0.conda#b3d14884810655c572ea9a91df7de205 -https://repo.anaconda.com/pkgs/main/linux-64/pip-23.0.1-py311h06a4308_0.conda#06ec6690fc9814ab769a62dfeeb26419 -# pip alabaster @ https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl#sha256=1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3 -# pip babel @ https://files.pythonhosted.org/packages/df/c4/1088865e0246d7ecf56d819a233ab2b72f7d6ab043965ef327d0731b5434/Babel-2.12.1-py3-none-any.whl#sha256=b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610 -# pip certifi @ https://files.pythonhosted.org/packages/9d/19/59961b522e6757f0c9097e4493fa906031b95b3ebe9360b2c3083561a6b4/certifi-2023.5.7-py3-none-any.whl#sha256=c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716 -# pip charset-normalizer @ https://files.pythonhosted.org/packages/18/36/7ae10a3dd7f9117b61180671f8d1e4802080cca88ad40aaabd3dad8bab0e/charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62 -# pip coverage @ https://files.pythonhosted.org/packages/a7/cd/3ce94ad9d407a052dc2a74fbeb1c7947f442155b28264eb467ee78dea812/coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb -# pip docutils @ https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl#sha256=96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 -# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142 -# pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.12.4-h5148396_1.conda#7863dc035441267f7b617f080c933671 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py312h06a4308_0.conda#ce85d9a864a73e0b12d31a97733c9fca +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py312h06a4308_0.conda#18d5f3b68a175c72576876db4afc9e9e +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py312h06a4308_0.conda#6d9697bb8b9f3212be10b3b8e01a12b9 +# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 +# pip babel @ https://files.pythonhosted.org/packages/27/45/377f7e32a5c93d94cd56542349b34efab5ca3f9e2fd5a68c5e93169aa32d/Babel-2.15.0-py3-none-any.whl#sha256=08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb +# pip certifi @ https://files.pythonhosted.org/packages/5b/11/1e78951465b4a225519b8c3ad29769c49e0d8d157a070f681d5b6d64737f/certifi-2024.6.2-py3-none-any.whl#sha256=ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/ee/fb/14d30eb4956408ee3ae09ad34299131fb383c47df355ddb428a7331cfa1e/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b +# pip coverage @ https://files.pythonhosted.org/packages/88/52/7054710a881b09d295e93b9889ac204c241a6847a8c05555fc6e1d8799d5/coverage-7.5.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555 +# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 +# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc +# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b # pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -# pip markupsafe @ https://files.pythonhosted.org/packages/fe/21/2eff1de472ca6c99ec3993eab11308787b9879af9ca8bbceb4868cf4f2ca/MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2 -# pip packaging @ https://files.pythonhosted.org/packages/ab/c3/57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121/packaging-23.1-py3-none-any.whl#sha256=994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61 -# pip platformdirs @ https://files.pythonhosted.org/packages/89/7e/c6ff9ddcf93b9b36c90d88111c4db354afab7f9a58c7ac3257fa717f1268/platformdirs-3.5.1-py3-none-any.whl#sha256=e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5 -# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 -# pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 -# pip pygments @ https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl#sha256=db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1 +# pip markupsafe @ https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 +# pip meson @ https://files.pythonhosted.org/packages/44/b2/d4433391a7c5e94a39b50ca7295a8ceba736e7c72c455752a60122f52453/meson-1.4.1-py3-none-any.whl#sha256=d5acc3abae2dad3c70ddcbd10acac92b78b144d34d43f40f5b8ac31dfd8a826a +# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b +# pip packaging @ https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl#sha256=5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 +# pip platformdirs @ https://files.pythonhosted.org/packages/68/13/2aa1f0e1364feb2c9ef45302f387ac0bd81484e9c9a4c5688a322fbdfd08/platformdirs-4.2.2-py3-none-any.whl#sha256=2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee +# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 +# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl#sha256=29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228 -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl#sha256=c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd -# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b -# pip urllib3 @ https://files.pythonhosted.org/packages/4b/1d/f8383ef593114755429c307449e7717b87044b3bcd5f7860b89b1f759e34/urllib3-2.0.2-py3-none-any.whl#sha256=d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e -# pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 -# pip pytest @ https://files.pythonhosted.org/packages/1b/d1/72df649a705af1e3a09ffe14b0c7d3be1fd730da6b98beb4a2ed26b8a023/pytest-7.3.1-py3-none-any.whl#sha256=3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362 -# pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 -# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f -# pip pooch @ https://files.pythonhosted.org/packages/84/8c/4da580db7fb4cfce8f5ed78e7d2aa542e6f201edd69d3d8a96917a8ff63c/pooch-1.7.0-py3-none-any.whl#sha256=74258224fc33d58f53113cf955e8d51bf01386b91492927d0d1b6b341a765ad7 -# pip pytest-cov @ https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl#sha256=6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a -# pip pytest-forked @ https://files.pythonhosted.org/packages/f4/af/9c0bda43e486a3c9bf1e0f876d0f241bc3f229d7d65d09331a0868db9629/pytest_forked-1.6.0-py3-none-any.whl#sha256=810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0 -# pip sphinx @ https://files.pythonhosted.org/packages/4b/a9/9760e8373a11a62f5ef66684771b0a5b2c4a699bf0dbbc650ca2b75cec36/sphinx-7.0.1-py3-none-any.whl#sha256=60c5e04756c1709a98845ed27a2eed7a556af3993afb66e77fec48189f742616 -# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9 -# pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65 +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 +# pip urllib3 @ https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl#sha256=a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 +# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 +# pip pytest @ https://files.pythonhosted.org/packages/4e/e7/81ebdd666d3bff6670d27349b5053605d83d55548e6bd5711f3b0ae7dd23/pytest-8.2.2-py3-none-any.whl#sha256=c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 +# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 +# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 +# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 +# pip pooch @ https://files.pythonhosted.org/packages/a8/87/77cc11c7a9ea9fd05503def69e3d18605852cd0d4b0d3b8f15bbeb3ef1d1/pooch-1.8.2-py3-none-any.whl#sha256=3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47 +# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 +# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 diff --git a/build_tools/azure/py38_conda_defaults_openblas_environment.yml b/build_tools/azure/pymin_conda_defaults_openblas_environment.yml similarity index 63% rename from build_tools/azure/py38_conda_defaults_openblas_environment.yml rename to build_tools/azure/pymin_conda_defaults_openblas_environment.yml index 7abb54f99d300..a82ba18e27980 100644 --- a/build_tools/azure/py38_conda_defaults_openblas_environment.yml +++ b/build_tools/azure/pymin_conda_defaults_openblas_environment.yml @@ -4,20 +4,20 @@ channels: - defaults dependencies: - - python=3.8 - - numpy=1.17.3 # min + - python=3.9 + - numpy=1.21 - blas[build=openblas] - - scipy=1.5.0 # min - - cython - - joblib - - threadpoolctl=2.2.0 - - matplotlib=3.1.3 # min - - pandas + - scipy=1.7 + - cython=3.0.10 # min + - joblib=1.2.0 # min + - matplotlib=3.3.4 # min - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools - pytest-cov - coverage - ccache + - pip + - pip: + - threadpoolctl==3.1.0 # min diff --git a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock new file mode 100644 index 0000000000000..a83d0cef3e063 --- /dev/null +++ b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock @@ -0,0 +1,99 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: be0f080ab1974d224952262cd9179ff999d10108421d7e7ded2435e6f08edf0e +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 +https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran5-11.2.0-h1234567_1.conda#36a01a8c30e0cadf0d3e842c50b73f3b +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-11.2.0-h00389a5_1.conda#7429b67ab7b1d7cb99b9d1f3ddaec6e3 +https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd +https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 +https://repo.anaconda.com/pkgs/main/linux-64/fftw-3.3.9-h5eee18b_2.conda#db1df41113accc18ec59a99f1631bfcd +https://repo.anaconda.com/pkgs/main/linux-64/icu-73.1-h6a678d5_0.conda#6d09df641fc23f7d277a04dc7ea32dd4 +https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fda872108412d1ccfe3fa572 +https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 +https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_1.conda#82831ef0b6c9595382d74e0c281f6742 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 +https://repo.anaconda.com/pkgs/main/linux-64/libiconv-1.16-h5eee18b_3.conda#197b1a0886a31fccab2167340528eebc +https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.21-h043d6bf_0.conda#7f7324dcc3c4761a14f3e4ac443235a7 +https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.3.2-h5eee18b_0.conda#9179fc7baefa1e027f572edbc519d805 +https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 +https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_1.conda#2ee58861f2b92b868ce761abb831819d +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_2.conda#0019453c25f5e72129f99236e60febaa +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 +https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e +https://repo.anaconda.com/pkgs/main/linux-64/libcups-2.4.2-h2d74bed_1.conda#3f265c2172a9e8c90a74037b6fa13685 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20230828-h5eee18b_0.conda#850eb5a9d2d7d3c66cce12e84406ca08 +https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201 +https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22 +https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.4-hfdd30dd_2.conda#ff7a0e3b92afb3c99b82c9f0ba8b5670 +https://repo.anaconda.com/pkgs/main/linux-64/pcre2-10.42-hebb0a14_1.conda#727e15c3cfa02b032da4eb0c1123e977 +https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_2.conda#3b7fe809e5b429b4f90fe064842a2370 +https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 +https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.20.1-h143b758_1.conda#cf1accc86321fa25d6b978cc748039ae +https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011 +https://repo.anaconda.com/pkgs/main/linux-64/libglib-2.78.4-hdc74915_0.conda#2f6d27741e931d5b6ba56e1a1312aaf0 +https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.1-h6a678d5_0.conda#235a671f74f0c4ecad9f9b3b107e3566 +https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/cyrus-sasl-2.1.28-h52b45da_1.conda#d634af1577e4008f9228ae96ce671c44 +https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1 +https://repo.anaconda.com/pkgs/main/linux-64/glib-tools-2.78.4-h6a678d5_0.conda#3dbe6227cd59818dca9afb75ccb70708 +https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff +https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 +https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.17-hdbd6064_0.conda#6bed363e25859faff66bf546a11c10e8 +https://repo.anaconda.com/pkgs/main/linux-64/openjpeg-2.4.0-h3ad879b_0.conda#86baecb47ecaa7f7ff2657a1f03b90c9 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_1.conda#4b453281859c293c9d577271f3b18a0d +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2024.6.2-py39h06a4308_0.conda#738daf43271605d7291ecae0e8cac41c +https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab +https://repo.anaconda.com/pkgs/main/linux-64/cython-3.0.10-py39h5eee18b_0.conda#1419a658ed2b4d5c3ac1964f33143b64 +https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.2.0-py39h06a4308_0.conda#960e2cb83ac5134df8e593a130aa11af +https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 +https://repo.anaconda.com/pkgs/main/linux-64/glib-2.78.4-h6a678d5_0.conda#045ff487547f7b2b7ff01648681b8ebe +https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 +https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py39h06a4308_0.conda#ac1f5687d70aa1128cbecb26bc9e559d +https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py39h6a678d5_0.conda#3d57aedbfbd054ce57fb3c1e4448828c +https://repo.anaconda.com/pkgs/main/linux-64/mysql-5.7.24-h721c034_2.conda#dfc19ca2466d275c4c1f73b62c57f37b +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.21.6-py39h375b286_1.conda#0061d9193658774ab79fc85d143a94fc +https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.2-py39h06a4308_0.conda#b3f88f45f31bde016e49be3e941e5272 +https://repo.anaconda.com/pkgs/main/linux-64/pillow-10.3.0-py39h5eee18b_0.conda#b346d6c71267c1553b6c18d3db5fdf6d +https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py39h06a4308_1.conda#fb4fed11ed43cf727dbd51883cc1d9fa +https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py39h06a4308_0.conda#6c89bf6d2fdf6d24126e34cb83fd10f1 +https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py39h06a4308_0.conda#3a0537468e59760404f63b4f04369828 +https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.13.0-py39h5eee18b_0.conda#256840c3841b52346ea5743be8490ede +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py39h06a4308_0.conda#3eb144d481b39c0fbbced789dd9b76b3 +https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 +https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a +https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py39h06a4308_0.conda#b06dffe7ddca2645ed72f5116f0a087d +https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.3.3-py39h5eee18b_0.conda#9c4bd985bb8adcd12f47e790e95a9333 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py39h06a4308_0.conda#40bb60408c7433d767fd8c65b35bc4a0 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py39h5eee18b_0.conda#e9da151b7e1f56be2cb569c65949a1d2 +https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 +https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.21.6-py39hac523dd_1.conda#f379f92039f666828a193fadd18c9819 +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py39h06a4308_0.conda#7f8ce3af15cfecd12e4dda8c5cef5fb7 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.4-py39h06a4308_0.conda#05ff36245067ecb0b6796a4f31a81f1c +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.9.0post0-py39h06a4308_2.conda#d6566c6d6a0140e45f22787ae58ed6e8 +https://repo.anaconda.com/pkgs/main/linux-64/sip-6.7.12-py39h6a678d5_0.conda#6988a3e12fcacfedcac523c1e4c3167c +https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.3.4-py39h62a2d02_0.conda#dbab28222c740af8e21a3e5e2882c178 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.1.0-py39h06a4308_1.conda#8f41fce21670b120bf7fa8a7883380d9 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-xdist-3.5.0-py39h06a4308_0.conda#e1d7ffcb1ee2ed9a84800f5c4bbbd7ae +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.7.3-py39hf838250_2.conda#0667ea5ac14d35e26da19a0f068739da +https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py39h79cecc1_0.conda#afc634da8b81dc504179d53d334e6e55 +https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h53bd1ea_10.conda#bd0c79e82df6323f638bdcb871891b61 +https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.10-py39h6a678d5_0.conda#52da5ff9b1144b078d2f41bab0b213f2 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py39h06a4308_0.conda#384fc5e01ebfcf30e7161119d3029b5a +# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b diff --git a/build_tools/azure/py38_conda_forge_mkl_environment.yml b/build_tools/azure/pymin_conda_forge_mkl_environment.yml similarity index 85% rename from build_tools/azure/py38_conda_forge_mkl_environment.yml rename to build_tools/azure/pymin_conda_forge_mkl_environment.yml index 2a2955d523a97..a219e4b3daa8f 100644 --- a/build_tools/azure/py38_conda_forge_mkl_environment.yml +++ b/build_tools/azure/pymin_conda_forge_mkl_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.8 + - python=3.9 - numpy - blas[build=mkl] - scipy @@ -13,9 +13,11 @@ dependencies: - threadpoolctl - matplotlib - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python - pytest-cov - coverage - wheel diff --git a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock new file mode 100644 index 0000000000000..e6fe3ab2d07dd --- /dev/null +++ b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock @@ -0,0 +1,118 @@ +# Generated by conda-lock. +# platform: win-64 +# input_hash: ea607aaeb7b1d1f8a1f821a9f505b3601083a218ec4763e2d72d3d3d800e718c +@EXPLICIT +https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.6.2-h56e8100_0.conda#12a3a2b3a00a21bbb390d4de5ad8dd0f +https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2024.1.0-h57928b3_966.conda#35d7ea07ad6c878bd7240d2d6c1b8657 +https://conda.anaconda.org/conda-forge/win-64/mkl-include-2024.1.0-h66d3029_692.conda#60233966dc7c0261c9a443120b43c477 +https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa +https://conda.anaconda.org/conda-forge/win-64/python_abi-3.9-4_cp39.conda#948b0d93d4ab1372d8fd45e1560afd47 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43 +https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9 +https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0 +https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.40.33810-ha82c5b3_20.conda#e39cc4c34c53654ec939558993d9dc5b +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd +https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h8a93ad2_20.conda#8558f367e1d7700554f7cdb823c46faf +https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.40.33810-h3bf8584_20.conda#c21f1b4a3a30bbc3ef35a50957578e0e +https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-hcfcfb64_5.conda#26eb8ca6ea332b675e11704cce84a3be +https://conda.anaconda.org/conda-forge/win-64/icu-73.2-h63175ca_0.conda#0f47d9e3192d9e09ae300da0d28e0f56 +https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074 +https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-hcfcfb64_1.conda#f77f319fb82980166569e1280d5b2864 +https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.20-hcfcfb64_0.conda#b12b5bde5eb201a1df75e49320cc938a +https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135 +https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-hcfcfb64_2.conda#e1eb10b1cca179f2baa3601e4efc8712 +https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.0.0-hcfcfb64_1.conda#3f1b948619c45b1ca714d60c7389092c +https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d +https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.45.3-hcfcfb64_0.conda#73f5dc8e2d55d9a1e14b11f49c3b4a28 +https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.4.0-hcfcfb64_0.conda#abd61d0ab127ec5cd68f62c2969e6f34 +https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_1.conda#d4483ca8afc57ddf1f6dded53b36c17f +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc +https://conda.anaconda.org/conda-forge/win-64/ninja-1.12.1-hc790b64_0.conda#a557dde55343e03c68cd7e29e7f87279 +https://conda.anaconda.org/conda-forge/win-64/openssl-3.3.1-h2466b09_0.conda#27fe798366ef3a81715b13eedf699e2f +https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4 +https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda#fc048363eb8f03cd1737600a5d08aafe +https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219 +https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.2-heb0366b_0.conda#6e8b0f22b4eef3b3cb3849bb4c3d47f9 +https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_1.conda#19ce3e1dacc7912b3d6ff40690ba9ae0 +https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_1.conda#71e890a0b361fd58743a13f77e1506b7 +https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_2.conda#aa622c938af057adc119f8b8eecada01 +https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.43-h19919ed_0.conda#77e398acc32617a0384553aea29e866b +https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 +https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.7-h283a6d9_1.conda#7ab2653cc21c44a1370ef3b409261b3d +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de +https://conda.anaconda.org/conda-forge/win-64/pcre2-10.43-h17e33f8_0.conda#d0485b8aa2cedb141a7bd27b4efa4c9c +https://conda.anaconda.org/conda-forge/win-64/python-3.9.19-h4de0772_0_cpython.conda#b6999bc275e0e6beae7b1c8ea0be1e85 +https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.6-h0ea2cb4_0.conda#9a17230f95733c04dc40a2b1e5491d74 +https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hcfcfb64_1.conda#0105229d7c5fabaa840043a86c10ec64 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/win-64/cython-3.0.10-py39h99910a6_0.conda#8ebc2fca8a6840d0694f37e698f4e59c +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda#3761b23693f768dc75a8fd0a73ca053f +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py39h1f6ef14_1.conda#4fc5bd0a7b535252028c647cc27d6c87 +https://conda.anaconda.org/conda-forge/win-64/libclang13-18.1.7-default_h97ce8ae_0.conda#a005e5fd30f14fcd2bc3e5ac57aa45a7 +https://conda.anaconda.org/conda-forge/win-64/libglib-2.80.2-h0df6a38_0.conda#ef9ae80bb2a15aee7a30180c057678ea +https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.10.0-default_h8125262_1001.conda#e761885eb4c181074d172220d46319a0 +https://conda.anaconda.org/conda-forge/win-64/libintl-devel-0.22.5-h5728263_2.conda#a2ad82fae23975e4ccbfab2847d31d48 +https://conda.anaconda.org/conda-forge/win-64/libtiff-4.6.0-hddb2be6_3.conda#6d1828c9039929e2f185c5fa9d133018 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/win-64/tornado-6.4.1-py39ha55e580_0.conda#7d1e87f3036af858ce7e248489c3faec +https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.1.0-py39ha55989b_0.conda#20ec896e8d97f2ff8be1124e624dc8f2 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a +https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece +https://conda.anaconda.org/conda-forge/noarch/zipp-3.18.1-pyhd8ed1ab_0.conda#c77c4aabc01b156a8cb4395f0233d335 +https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_1.conda#f47f6db2528e38321fb00ae31674c133 +https://conda.anaconda.org/conda-forge/win-64/coverage-7.5.3-py39ha55e580_0.conda#28d426e365cb4ed87d22d1a89c0bd006 +https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.80.2-h2f9d560_0.conda#42fc785d9db7ab051a206fbf882ecf2e +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda#d3592435917b62a8becff3a60db674f6 +https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.2-h3d672ee_0.conda#7e7099ad94ac3b599808950cec30ad4e +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/win-64/sip-6.7.12-py39h99910a6_0.conda#0cc5774390ada632ed7975203057c91c +https://conda.anaconda.org/conda-forge/win-64/tbb-2021.12.0-hc790b64_1.conda#e98333643abc739ebea1bac97a479828 +https://conda.anaconda.org/conda-forge/win-64/fonttools-4.53.0-py39ha55e580_0.conda#7c4625b8a1013dd22e924f1fa9fbc605 +https://conda.anaconda.org/conda-forge/win-64/glib-2.80.2-h0df6a38_0.conda#a728ca6f04c33ecb0f39eeda5fbd0e23 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/win-64/mkl-2024.1.0-h66d3029_692.conda#b43ec7ed045323edeff31e348eea8652 +https://conda.anaconda.org/conda-forge/win-64/pillow-10.3.0-py39h9ee4981_0.conda#6d69d57c41867acc162ef0205a8efaef +https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.12.2-py39h99910a6_5.conda#dffbcea794c524c471772a5f697c2aea +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.24.4-h5006eae_0.conda#3d7ebad364d5f63a1ae54eecb35aee31 +https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-22_win64_mkl.conda#65c56ecdeceffd6c32d3d54db7e02c6e +https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2024.1.0-h57928b3_692.conda#9b3d1d4916a56fd32460f6fe784dcb51 +https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.24.4-hba88be7_0.conda#0b1d683d462029446924fa87a50dda12 +https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-22_win64_mkl.conda#336c93ab102846c6131cf68e722a68f1 +https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-22_win64_mkl.conda#c752cc2af9f3d8d7b2fdebb915a33ef7 +https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-22_win64_mkl.conda#db33ffa4bae1d2f6d5602afaa048bf6b +https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py39hddb5d58_0.conda#6e30ff8f2d3f59f45347dfba8bc22a04 +https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-hcef0176_21.conda#76544d3dfeff8fd52250df168cb0005b +https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-22_win64_mkl.conda#adeb834f3b7b06f3d77cd90b7c9d08f0 +https://conda.anaconda.org/conda-forge/win-64/contourpy-1.2.1-py39h1f6ef14_0.conda#03e25c6bae87f4f9595337255b44b0fb +https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.9-py39hb77abff_5.conda#5ed899124a51958336371ff01482b8fd +https://conda.anaconda.org/conda-forge/win-64/scipy-1.13.1-py39h1a10956_0.conda#9f8e571406af04d2f5fdcbecec704505 +https://conda.anaconda.org/conda-forge/win-64/blas-2.122-mkl.conda#aee642435696de144ddf91dc02101cf8 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.8.4-py39he1095e7_2.conda#5c813b5da86f186d8026b6de6429c212 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.8.4-py39hcbf5309_2.conda#1ecee90b529cb69ec4e95add23323110 diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml similarity index 80% rename from build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml rename to build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml index bbbb3bb4cef6c..38737e7c9c0b0 100644 --- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.8 + - python=3.9 - numpy - blas[build=openblas] - scipy @@ -15,7 +15,11 @@ dependencies: - pandas - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python + - sphinx + - numpydoc - ccache diff --git a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock new file mode 100644 index 0000000000000..6c03c413bb212 --- /dev/null +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock @@ -0,0 +1,205 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 3974f9847d888a2fd37ba5fcfb76cb09bba4c9b84b6200932500fc94e3b0c4ae +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.6.2-hbcca054_0.conda#847c3c2905cc467cea52c24f9cfa8080 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_3.conda#7c1062eaa78dec4ea8a9a988dbda6045 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-h4ab18f5_0.conda#a41fa0e391cc9e0d6b78ac69ca047a6c +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_1.conda#340278ded8b0dc3a73f3660bbb0adbc6 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.7-hb77312f_0.conda#bc0ea7e1f75a9b1c8467597fbbd9f86b +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.7-ha31de31_0.conda#7234f31acd176e402e91e03feba90f7d +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.7-default_h087397f_0.conda#536526073c2e7f9056fdce8584da779e +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py39hd3abc70_0.conda#c183e99f9320e5e2d0f9c43efcb3fb22 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.18.1-pyhd8ed1ab_0.conda#c77c4aabc01b156a8cb4395f0233d335 +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.53.0-py39hd3abc70_0.conda#9dae301603c88aef61dba733e8931cdd +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.4-haf2f30d_0.conda#926c2c7ee7a0b48d6d70783a33f7bc80 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda#5ede4753180c7a550a443c430dc8ab52 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.4-h9ad1361_0.conda#147cce520ec59367549fd0d96d404213 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hfc16268_1.conda#8b23d2b425035a7468d17e6fe1d54124 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.1-py39haf93ffa_0.conda#492a2cd65862d16a4aaf535ae9ccb761 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39h10d1fc8_2.conda#c9fb6571b93b1dd490ea627af7344f36 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39h85c637f_1.conda#b2b15112d019e27e62f9433e31607d08 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_2.conda#bd956c7563b6a6b27521b83623c74e22 +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_1.conda#66798cbfdcb003d9fbccd92cd08eb3ac +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e diff --git a/build_tools/azure/pypy3_environment.yml b/build_tools/azure/pypy3_environment.yml index d4f0d22e96042..84784d5a90b20 100644 --- a/build_tools/azure/pypy3_environment.yml +++ b/build_tools/azure/pypy3_environment.yml @@ -15,6 +15,8 @@ dependencies: - matplotlib - pyamg - pytest - - pytest-xdist=2.5.0 - - setuptools + - pytest-xdist + - pip + - ninja + - meson-python - ccache diff --git a/build_tools/azure/pypy3_linux-64_conda.lock b/build_tools/azure/pypy3_linux-64_conda.lock index 5cadf0f58de2f..a47c89e5a7aab 100644 --- a/build_tools/azure/pypy3_linux-64_conda.lock +++ b/build_tools/azure/pypy3_linux-64_conda.lock @@ -1,29 +1,27 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 35e4a4f1db15219fa4cb71af7b54acc24ec7c3b3610c479f979c6c44cbd93db7 +# input_hash: cb8a71fc5a5762d803c62e60f01aaf1788c4357c1233fd623cecb1225076b9b5 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_pypy39_pp73.conda#6f23be0f8f1e4871998437b188425ea3 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_pypy39_pp73.conda#c1b2f29111681a4036ed21eaa3f44620 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6 -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-h4ab18f5_2.conda#b8934d399b56d73e323403e183d009c5 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 @@ -31,79 +29,75 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.t https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/gdbm-1.18-h0a1914f_2.tar.bz2#b77bc399b07a19c00fe12fdc95ee0297 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.42.0-h2c6b66d_0.conda#1192f6ec654a5bc4ee1d64bdc4a3e5cc -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.11-h9557127_1.conda#c5fe8c8aaecf7dd44dc3042789f95987 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.16-0_73_pypy.conda#16eebd2564f86026ea0abe5b8e446438 -https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.45.3-h2c6b66d_0.conda#be7d70f2db41b674733667bdd69bd000 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.15-h9557127_1.conda#0862f2ce457660f1060225d96d468237 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-1_73_pypy.conda#6e0143cd3dd940d3004cd857e37ccd81 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py39hc10206b_0.conda#9e7ab7c9dfff3ea8c3df6f68c657436b -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39hc10206b_0.conda#60c2d58b33a21c32f469e3f6a9eb7e4b +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39h2865249_1.tar.bz2#6b7e75ba141872a00154f312d43d9a8c +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39ha90811c_1.conda#25edffabcb0760fc1821597c4ce920db +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py39h129f8d9_0.conda#0021613f91e64bd6fa3aece9e5b68f34 -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py39hb514683_1.conda#beec7faed9dff6b30e8a1a1c22f9f039 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.11-0_pypy39.conda#059800e8aa07f99d31e3dd0bf553a3f6 -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h6dedee3_0.conda#557d64563e84ff21b14f586c7f662b7f +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90a76f3_0.conda#799e6519cfffe2784db27b1db2ef33f3 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.15-1_pypy39.conda#a418a6c16bd6f7ed56b92194214791a0 +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py39hf860d4a_0.conda#f3adae0ec927d6c139ef9557bda43fd0 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39h4d8b378_0.tar.bz2#44eea5be274d005065d87df9cf2a9234 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py39haa83c70_0.conda#77595fa3e3dfca46289e3722cb97b29b -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py39hf860d4a_0.conda#fd4b05a718ebd4fabc806466f7f3ed8f -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py39h3a8b213_0.conda#9e1009635ea6b7924f827d6022d0ade6 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py39h4162558_0.conda#b6ca076a90a7f2a8d7ff976d243dd4c5 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py39h129f8d9_3.conda#ccc3e84894f1a2b3fea200b4e8946903 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py39h6728ab1_0.conda#ee14077fae1c48e0ca5154f5a5427521 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hf860d4a_0.conda#e7fded713fb466e1e0670afce1761b47 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hf860d4a_0.conda#f699157518d28d00c87542b4ec1273be +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39ha90811c_0.conda#07ed14c8326da42356514bcbc0b04802 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hf860d4a_0.conda#63421b4dd7222fad555e34ec9af015a1 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.1-pyhd8ed1ab_0.conda#e4418e8bdbaa8eea28e047531e6763c8 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h6dedee3_2.conda#6c5d74bac41838f4377dfd45085e1fec +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39h3c335be_1.conda#7278eb55a7e97a0ba2376a6c608e7c46 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39h6fb8a73_2.conda#3212f51613e10b3ee319f3f2bf8ee5a8 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39h4162558_2.conda#05babd7bae196648bfc6b7e3d9ea7630 diff --git a/build_tools/azure/pytest-pyodide.js b/build_tools/azure/pytest-pyodide.js new file mode 100644 index 0000000000000..c195940ce3b5b --- /dev/null +++ b/build_tools/azure/pytest-pyodide.js @@ -0,0 +1,53 @@ +const { opendir } = require('node:fs/promises'); +const { loadPyodide } = require("pyodide"); + +async function main() { + let exit_code = 0; + try { + global.pyodide = await loadPyodide(); + let pyodide = global.pyodide; + const FS = pyodide.FS; + const NODEFS = FS.filesystems.NODEFS; + + let mountDir = "/mnt"; + pyodide.FS.mkdir(mountDir); + pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir); + + await pyodide.loadPackage(["micropip"]); + await pyodide.runPythonAsync(` + import glob + import micropip + + wheels = glob.glob('/mnt/dist/*.whl') + wheels = [f'emfs://{wheel}' for wheel in wheels] + print(f'installing wheels: {wheels}') + await micropip.install(wheels); + + pkg_list = micropip.list() + print(pkg_list) + `); + + // Pyodide is built without OpenMP, need to set environment variable to + // skip related test + await pyodide.runPythonAsync(` + import os + os.environ['SKLEARN_SKIP_OPENMP_TEST'] = 'true' + `); + + await pyodide.runPythonAsync("import micropip; micropip.install('pytest')"); + let pytest = pyodide.pyimport("pytest"); + let args = process.argv.slice(2); + console.log('pytest args:', args); + exit_code = pytest.main(pyodide.toPy(args)); + } catch (e) { + console.error(e); + // Arbitrary exit code here. I have seen this code reached instead of a + // Pyodide fatal error sometimes + exit_code = 66; + + } finally { + process.exit(exit_code); + } +} + +main(); diff --git a/build_tools/azure/python_nogil_lock.txt b/build_tools/azure/python_nogil_lock.txt index cd44de206adb4..7f67a48842dea 100644 --- a/build_tools/azure/python_nogil_lock.txt +++ b/build_tools/azure/python_nogil_lock.txt @@ -7,55 +7,67 @@ --index-url https://d1yxz45j0ypngg.cloudfront.net/ --extra-index-url https://pypi.org/simple -contourpy==1.0.7 +contourpy==1.1.1 # via matplotlib -cycler==0.11.0 +cycler==0.12.1 # via matplotlib -cython==0.29.33 +cython==3.0.10 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -exceptiongroup==1.1.1 +exceptiongroup==1.2.1 # via pytest -execnet==1.9.0 +execnet==2.1.1 # via pytest-xdist -fonttools==4.39.4 +fonttools==4.51.0 # via matplotlib iniconfig==2.0.0 # via pytest -joblib==1.2.0 +joblib==1.4.2 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt kiwisolver==1.4.4 # via matplotlib matplotlib==3.6.2 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt +meson==1.4.0 + # via meson-python +meson-python==0.16.0 + # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt +ninja==1.11.1.1 + # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt numpy==1.24.0 # via # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt # contourpy # matplotlib # scipy -packaging==23.1 +packaging==24.0 # via # matplotlib + # meson-python + # pyproject-metadata # pytest pillow==9.5.0 # via matplotlib -pluggy==1.0.0 +pluggy==1.5.0 # via pytest -pyparsing==3.0.9 +pyparsing==3.1.2 # via matplotlib -pytest==7.3.1 +pyproject-metadata==0.8.0 + # via meson-python +pytest==7.4.4 # via # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt # pytest-xdist -pytest-xdist==3.3.0 +pytest-xdist==3.6.1 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via matplotlib scipy==1.9.3 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt six==1.16.0 # via python-dateutil -threadpoolctl==3.1.0 +threadpoolctl==3.5.0 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt tomli==2.0.1 - # via pytest + # via + # meson-python + # pytest diff --git a/build_tools/azure/python_nogil_requirements.txt b/build_tools/azure/python_nogil_requirements.txt index 970059ede81aa..2cebad9a03b25 100644 --- a/build_tools/azure/python_nogil_requirements.txt +++ b/build_tools/azure/python_nogil_requirements.txt @@ -11,5 +11,10 @@ scipy cython joblib threadpoolctl -pytest +# TODO: somehow pytest 8 does not seem to work with meson editable +# install. Exit code is 5, i.e. no test collected +# This would be fixed by https://github.com/mesonbuild/meson-python/pull/569 +pytest<8 pytest-xdist +meson-python +ninja diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index 98ac2e797b73c..faf48e27efefb 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -48,37 +48,11 @@ if [[ "$COVERAGE" == "true" ]]; then TEST_CMD="$TEST_CMD --cov-config='$COVERAGE_PROCESS_START' --cov sklearn --cov-report=" fi -if [[ -n "$CHECK_WARNINGS" ]]; then - TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Werror::numpy.VisibleDeprecationWarning" - - # numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib - # removes its usage - TEST_CMD="$TEST_CMD -Wignore:tostring:DeprecationWarning" - - # Ignore distutils deprecation warning, used by joblib internally - TEST_CMD="$TEST_CMD -Wignore:distutils\ Version\ classes\ are\ deprecated:DeprecationWarning" - - # Ignore pkg_resources deprecation warnings triggered by pyamg - TEST_CMD="$TEST_CMD -W 'ignore:pkg_resources is deprecated as an API:DeprecationWarning'" - TEST_CMD="$TEST_CMD -W 'ignore:Deprecated call to \`pkg_resources:DeprecationWarning'" - - # In some case, exceptions are raised (by bug) in tests, and captured by pytest, - # but not raised again. This is for instance the case when Cython directives are - # activated: IndexErrors (which aren't fatal) are raised on out-of-bound accesses. - # In those cases, pytest instead raises pytest.PytestUnraisableExceptionWarnings, - # which we must treat as errors on the CI. - TEST_CMD="$TEST_CMD -Werror::pytest.PytestUnraisableExceptionWarning" -fi - if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then XDIST_WORKERS=$(python -c "import joblib; print(joblib.cpu_count(only_physical_cores=True))") TEST_CMD="$TEST_CMD -n$XDIST_WORKERS" fi -if [[ "$SHOW_SHORT_SUMMARY" == "true" ]]; then - TEST_CMD="$TEST_CMD -ra" -fi - if [[ -n "$SELECTED_TESTS" ]]; then TEST_CMD="$TEST_CMD -k $SELECTED_TESTS" @@ -86,6 +60,15 @@ if [[ -n "$SELECTED_TESTS" ]]; then export SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all" fi +TEST_CMD="$TEST_CMD --pyargs sklearn" +if [[ "$DISTRIB" == "conda-pypy3" ]]; then + # Run only common tests for PyPy. Running the full test suite uses too + # much memory and causes the test to time out sometimes. See + # https://github.com/scikit-learn/scikit-learn/issues/27662 for more + # details. + TEST_CMD="$TEST_CMD.tests.test_common" +fi + set -x -eval "$TEST_CMD --pyargs sklearn" +eval "$TEST_CMD" set +x diff --git a/build_tools/azure/test_script_pyodide.sh b/build_tools/azure/test_script_pyodide.sh index 69dea9c41eaf5..d1aa207f864a2 100644 --- a/build_tools/azure/test_script_pyodide.sh +++ b/build_tools/azure/test_script_pyodide.sh @@ -2,14 +2,8 @@ set -e -source pyodide-venv/bin/activate - -pip list - -# Need to be outside of the git clone otherwise finds non build sklearn folder -cd /tmp - -# TODO for now only testing sklearn import to make sure the wheel is not badly -# broken. When Pyodide 0.24 is released we should run the full test suite and -# xfail tests that fail due to Pyodide limitations -python -c 'import sklearn' +# We are using a pytest js wrapper script to run tests inside Pyodide. Maybe +# one day we can use a Pyodide venv instead but at the time of writing +# (2023-09-27) there is an issue with scipy.linalg in a Pyodide venv, see +# https://github.com/pyodide/pyodide/issues/3865 for more details. +node build_tools/azure/pytest-pyodide.js --pyargs sklearn --durations 20 --showlocals diff --git a/build_tools/azure/ubuntu_atlas_lock.txt b/build_tools/azure/ubuntu_atlas_lock.txt index 255d037ccbaee..3fa5995aafd1e 100644 --- a/build_tools/azure/ubuntu_atlas_lock.txt +++ b/build_tools/azure/ubuntu_atlas_lock.txt @@ -4,32 +4,40 @@ # # pip-compile --output-file=build_tools/azure/ubuntu_atlas_lock.txt build_tools/azure/ubuntu_atlas_requirements.txt # -cython==0.29.35 +cython==3.0.10 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -exceptiongroup==1.1.1 +exceptiongroup==1.2.1 # via pytest -execnet==1.9.0 +execnet==2.1.1 # via pytest-xdist iniconfig==2.0.0 # via pytest -joblib==1.1.1 +joblib==1.2.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -packaging==23.1 - # via pytest -pluggy==1.0.0 +meson==1.4.1 + # via meson-python +meson-python==0.16.0 + # via -r build_tools/azure/ubuntu_atlas_requirements.txt +ninja==1.11.1.1 + # via -r build_tools/azure/ubuntu_atlas_requirements.txt +packaging==24.1 + # via + # meson-python + # pyproject-metadata + # pytest +pluggy==1.5.0 # via pytest -py==1.11.0 - # via pytest-forked -pytest==7.3.1 +pyproject-metadata==0.8.0 + # via meson-python +pytest==8.2.2 # via # -r build_tools/azure/ubuntu_atlas_requirements.txt - # pytest-forked # pytest-xdist -pytest-forked==1.6.0 - # via pytest-xdist -pytest-xdist==2.5.0 +pytest-xdist==3.6.1 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -threadpoolctl==2.0.0 +threadpoolctl==3.1.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt tomli==2.0.1 - # via pytest + # via + # meson-python + # pytest diff --git a/build_tools/azure/ubuntu_atlas_requirements.txt b/build_tools/azure/ubuntu_atlas_requirements.txt index 57413851e5329..dfb0cfebc54d1 100644 --- a/build_tools/azure/ubuntu_atlas_requirements.txt +++ b/build_tools/azure/ubuntu_atlas_requirements.txt @@ -1,8 +1,10 @@ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py -cython -joblib==1.1.1 # min -threadpoolctl==2.0.0 # min +cython==3.0.10 # min +joblib==1.2.0 # min +threadpoolctl==3.1.0 # min pytest -pytest-xdist==2.5.0 +pytest-xdist +ninja +meson-python diff --git a/build_tools/azure/upload_codecov.sh b/build_tools/azure/upload_codecov.sh index ab6f55cf3b6ef..0e87b2dafc8b4 100755 --- a/build_tools/azure/upload_codecov.sh +++ b/build_tools/azure/upload_codecov.sh @@ -10,7 +10,7 @@ fi # When we update the codecov uploader version, we need to update the checksums. # The checksum for each codecov binary is available at # https://uploader.codecov.io e.g. for linux -# https://uploader.codecov.io/v0.4.1/linux/codecov.SHA256SUM. +# https://uploader.codecov.io/v0.7.1/linux/codecov.SHA256SUM. # Instead of hardcoding a specific version and signature in this script, it # would be possible to use the "latest" symlink URL but then we need to @@ -20,7 +20,7 @@ fi # However this approach would yield a larger number of downloads from # codecov.io and keybase.io, therefore increasing the risk of running into # network failures. -CODECOV_UPLOADER_VERSION=0.4.1 +CODECOV_UPLOADER_VERSION=0.7.1 CODECOV_BASE_URL="https://uploader.codecov.io/v$CODECOV_UPLOADER_VERSION" @@ -39,19 +39,19 @@ fi if [[ $OSTYPE == *"linux"* ]]; then curl -Os "$CODECOV_BASE_URL/linux/codecov" - SHA256SUM="32cb14b5f3aaacd67f4c1ff55d82f037d3cd10c8e7b69c051f27391d2e66e15c codecov" + SHA256SUM="b9282b8b43eef83f722646d8992c4dd36563046afe0806722184e7e9923a6d7b codecov" echo "$SHA256SUM" | shasum -a256 -c chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z + ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose elif [[ $OSTYPE == *"darwin"* ]]; then curl -Os "$CODECOV_BASE_URL/macos/codecov" - SHA256SUM="4ab0f06f06e9c4d25464f155b0aff36bfc1e8dbcdb19bfffd586beed1269f3af codecov" + SHA256SUM="e4ce34c144d3195eccb7f8b9ca8de092d2a4be114d927ca942500f3a6326225c codecov" echo "$SHA256SUM" | shasum -a256 -c chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z + ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose else curl -Os "$CODECOV_BASE_URL/windows/codecov.exe" - SHA256SUM="e0cda212aeaebe695509ce8fa2d608760ff70bc932003f544f1ad368ac5450a8 codecov.exe" + SHA256SUM="f5de88026f061ff08b88a5895f9c11855523924ceb8174e027403dd20fa5e4d6 codecov.exe" echo "$SHA256SUM" | sha256sum -c - ./codecov.exe -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z + ./codecov.exe -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose fi diff --git a/build_tools/azure/windows.yml b/build_tools/azure/windows.yml index fbeed3de51c4a..1727da4138f07 100644 --- a/build_tools/azure/windows.yml +++ b/build_tools/azure/windows.yml @@ -37,7 +37,7 @@ jobs: architecture: 'x86' displayName: Use 32 bit System Python condition: and(succeeded(), eq(variables['PYTHON_ARCH'], '32')) - - bash: ./build_tools/azure/install_win.sh + - bash: ./build_tools/azure/install.sh displayName: 'Install' - bash: ./build_tools/azure/test_script.sh displayName: 'Test Library' diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh index 13be474ef4e28..014ac0fac8d7a 100755 --- a/build_tools/circle/build_doc.sh +++ b/build_tools/circle/build_doc.sh @@ -148,8 +148,6 @@ else make_args=html fi -make_args="SPHINXOPTS=-T $make_args" # show full traceback on exception - # Installing required system packages to support the rendering of math # notation in the HTML documentation and to optimize the image files sudo -E apt-get -yq update --allow-releaseinfo-change @@ -161,21 +159,20 @@ if [[ `type -t deactivate` ]]; then deactivate fi -MAMBAFORGE_PATH=$HOME/mambaforge -# Install dependencies with mamba -wget -q https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \ - -O mambaforge.sh -chmod +x mambaforge.sh && ./mambaforge.sh -b -p $MAMBAFORGE_PATH -export PATH="/usr/lib/ccache:$MAMBAFORGE_PATH/bin:$PATH" +# Install Miniforge +MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" +curl -L --retry 10 $MINIFORGE_URL -o miniconda.sh +MINIFORGE_PATH=$HOME/miniforge3 +bash ./miniconda.sh -b -p $MINIFORGE_PATH +source $MINIFORGE_PATH/etc/profile.d/conda.sh +conda activate +export PATH="/usr/lib/ccache:$PATH" ccache -M 512M export CCACHE_COMPRESS=1 -# pin conda-lock to latest released version (needs manual update from time to time) -mamba install "$(get_dep conda-lock min)" -y - -conda-lock install --log-level DEBUG --name $CONDA_ENV_NAME $LOCK_FILE -source activate $CONDA_ENV_NAME +create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE +conda activate $CONDA_ENV_NAME show_installed_libraries @@ -192,17 +189,13 @@ export OMP_NUM_THREADS=1 if [[ "$CIRCLE_BRANCH" =~ ^main$ && -z "$CI_PULL_REQUEST" ]] then # List available documentation versions if on main - python build_tools/circle/list_versions.py > doc/versions.rst + python build_tools/circle/list_versions.py --json doc/js/versions.json --rst doc/versions.rst fi # The pipefail is requested to propagate exit code set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt -# Insert the version warning for deployment -find _build/html/stable -name "*.html" | xargs sed -i '/<\/body>/ i \ -\ ' - cd - set +o pipefail @@ -246,7 +239,7 @@ then ( echo '
    ' echo "$affected" | sed 's|.*|
  • & [dev, stable]
  • |' - echo '

General: Home | API Reference | Examples

' + echo '

General: Home | API Reference | Examples

' echo 'Sphinx Warnings in affected files
    ' echo "$warnings" | sed 's/\/home\/circleci\/project\//
  • /g' echo '
' diff --git a/build_tools/circle/doc_environment.yml b/build_tools/circle/doc_environment.yml index 84be13dfa5218..ea930fadcb528 100644 --- a/build_tools/circle/doc_environment.yml +++ b/build_tools/circle/doc_environment.yml @@ -15,21 +15,29 @@ dependencies: - pandas - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python - scikit-image - seaborn - memory_profiler - compilers - - sphinx=6.0.0 + - sphinx - sphinx-gallery + - sphinx-copybutton - numpydoc - sphinx-prompt - plotly + - polars - pooch - sphinxext-opengraph + - sphinx-remove-toctrees + - sphinx-design + - pydata-sphinx-theme - pip - pip: - jupyterlite-sphinx - jupyterlite-pyodide-kernel + - sphinxcontrib-sass diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock index 76113302d2a0f..57908c8c4026f 100644 --- a/build_tools/circle/doc_linux-64_conda.lock +++ b/build_tools/circle/doc_linux-64_conda.lock @@ -1,72 +1,74 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 936006a8395a70f77e3b4ebe07bd10d013d2e2d13b6042ce96f73632d466d840 +# input_hash: f6f3862aafcafa139a322e498517c3db58e1b8db95f1b1ca8c18f5b70d446dc9 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.6.2-hbcca054_0.conda#847c3c2905cc467cea52c24f9cfa8080 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-11.3.0-h210ce93_19.tar.bz2#9b7bdb0b42ce4e4670d32bfe0532b56a -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-11.3.0-h210ce93_19.tar.bz2#8aee006c0662f551f3acef9a7077a5b9 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_cp39.conda#0dd193187d54e585cac7eab942a8847e -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_3.conda#7c1062eaa78dec4ea8a9a988dbda6045 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h0223996_107.conda#851e9651c9e4cd5dc19f80398eba9a1c +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h0223996_107.conda#167a1f5d77d8f3c2a638f7eb418429f1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10 -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133 -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.39-he00db2b_1.conda#3d726e8b51a1f5bfd66892a2b7d9db2d +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha1999f0_3.conda#2b0c0d451353cde73295d799ea8886b1 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.39-hdd6e379_1.conda#1276c18b0a562739185dbf5bd14b57b2 -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.39-h5fc0e48_13.conda#7f25a524665e4e2f8a5f86522f8d0e31 +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_3.conda#abcdaa12050c40a74330701f22418e5a +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hb3c18ed_4.conda#19286994c03c5207a70c7cfabe294570 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f -https://conda.anaconda.org/conda-forge/linux-64/aom-3.5.0-h27087fc_0.tar.bz2#a08150fd2298460cd1fcccf626305642 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/aom-3.9.0-hac33072_0.conda#93a3bf248e5bc729807db198a9c89f07 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.19.1-hd590300_0.conda#e8c18d865be43e2fb3f7a145b6adf1f5 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645 https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda#418c6ca5929a611cbd69204907a83995 -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h0b41bf4_3.conda#96f3b11872ef6fad973eac856cd2624f -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 -https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146 -https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-h7f98852_2.tar.bz2#8e787b08fe19986d99d034b839df2961 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-hd590300_3.conda#5aeabe88534ea4169d4c49998f293d6c https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libaec-1.0.6-hcb278e6_1.conda#0f683578378cddb223e7fd24f785ab2a -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 -https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd +https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.3-h59595ed_0.conda#5e97e271911b8b2001a8b71860c32faa +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.1.0-h00ab1b0_0.conda#88928158ccfe797eac29ef5e03f7d23d +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-11.3.0-h239ccf8_19.tar.bz2#d17fd55aed84ab6592c5419b6600501c +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-hb8811af_7.conda#ee573415c47ce17f65101d0b3fba396d https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277 -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-h4ab18f5_0.conda#a41fa0e391cc9e0d6b78ac69ca047a6c +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.10-h9fff704_0.conda#e6d228cd0bb74a51dd18f5bfce0b4115 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48 +https://conda.anaconda.org/conda-forge/linux-64/rav1e-0.6.6-he8a937b_2.conda#77d9955b4abddb811cb8ab1aa7d743e4 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.0-hdb0a2a9_1.conda#843bbb8ace1d64ac50d64639ff38b014 +https://conda.anaconda.org/conda-forge/linux-64/svt-av1-2.1.0-hac33072_0.conda#2a08edb7cd75e56623f2712292a97325 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 @@ -76,242 +78,251 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_10 https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae -https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.0-h27087fc_3.tar.bz2#0428af0510c3fafedf1c66b43102a34b +https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.1-h59595ed_0.conda#fd486bffbf0d6841cf1456a8f2e3a995 https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.7-h0b41bf4_0.conda#49e8329110001f04923fe7e864990b0c -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-11.3.0-hab1b70f_19.tar.bz2#89ac16d36e66ccb9ca5d34c9217e5799 -https://conda.anaconda.org/conda-forge/linux-64/libavif-0.11.1-h8182462_2.conda#41c399ed4c439e37b844c24ab5621b5a -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h58ffeeb_7.conda#95f78565a09852783d3e90e0389cfa5f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.0.4-hd2f8ffe_4.conda#cb911b3e0d863ca9caafd767525f7cac +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.52.0-h61bc06f_0.conda#613955a50485812985c059e7b269f42e -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_1.conda#340278ded8b0dc3a73f3660bbb0adbc6 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.4-h0f2a231_0.conda#876286b5941933a0f558777e57d883cc -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.9.2-hb4ffafa_0.conda#e029f773ae3355c8a05ad7c3db2f8a4b -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 -https://conda.anaconda.org/conda-forge/linux-64/gcc-11.3.0-h02d0930_13.conda#ead4470a123fb664e358d02a333676ba -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-11.3.0-he6f903b_13.conda#90a9fa7151e709ba224232ea9bfa4fea -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-11.3.0-he34c6f7_19.tar.bz2#3de873ee757f1a2e583416a3583f84c4 -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-11.3.0-hab1b70f_19.tar.bz2#b73564a352e64bb5f2c9bfd3cd6dd127 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.16-h2782a2a_0_cpython.conda#95c9b7c96a7fd7342e0c9d0a917b8f78 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.5-hc2324a3_1.conda#11d76bee958b1989bd1ac6ee7372ea6d +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.14.4-hb4ffafa_1.conda#84eb54e92644c328e087e1c725773317 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_7.conda#84b1c5cebd0a0443f3d7f90a4be93fc6 +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6b3dd4b_4.conda#4b76ee727fca36fd83ef58586516c46a +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h1645026_7.conda#2d9d4058c433c9ce2a811c76658c4efd +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h2a574ab_7.conda#265caa78b979f112fc241cecd0015c91 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.10.2-hcae5a98_0.conda#901db891e1e21afd8524cd636a8c8e3b +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.7-hb77312f_0.conda#bc0ea7e1f75a9b1c8467597fbbd9f86b +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.7-ha31de31_0.conda#7234f31acd176e402e91e03feba90f7d +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.13-pyhd8ed1ab_0.conda#06006184e203b61d3525f90de394471e -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.5.2-h0b41bf4_0.conda#69afb4e35be6366c2c1f9ed7f49bc3e6 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf -https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.1-pyhd8ed1ab_0.conda#b325bfc4cff7d7f8a868f1f7ecc4ed16 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_1.conda#e9dffe1056994133616378309f932d77 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py39h3d6467e_0.conda#019c9509764e66c9d9d38b5ca365a9f4 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_1.tar.bz2#adb733ec2ee669f6d010758d054da60f -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.5.0-pyh1a96a4e_0.conda#20edd290b319aa0eff3e9055375756dc -https://conda.anaconda.org/conda-forge/linux-64/gfortran-11.3.0-ha859ce3_13.conda#dd92c047f03f5288b111117b47fdff3c -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-11.3.0-h3c55166_13.conda#cc56575e38eb6bf082654de641476b15 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880 -https://conda.anaconda.org/conda-forge/linux-64/gxx-11.3.0-h02d0930_13.conda#b8882bac01c133f6f8ac86193c6c00a7 -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-11.3.0-hc203a17_13.conda#c22e035729c5d224dd875274c92a0522 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed +https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_7.conda#8efa768f7f74085629f3e1090e7f0569 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h5877db1_4.conda#aa5735fc506449d6bc2ec27bc066364e +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_7.conda#721c5433122a02bf3a081db10a2e68e2 +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-ha28b414_4.conda#a2046880d7a1a377824bac6538d85aff +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39hf939315_1.tar.bz2#41679a052a8ce841c74df1ebc802e411 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.1.2-h409715c_0.conda#50c873c9660ed116707ae15b663928d8 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17 -https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f -https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py39hd1e30aa_0.conda#9c858d105816f454c6b64f3e19184b60 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.7-default_h087397f_0.conda#536526073c2e7f9056fdce8584da779e +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py39h6183b62_0.conda#8626d6d5169950ce4b99b082667773f7 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py39h72bdee0_0.conda#1d54d3a75c3192ab7655d9c3d16809f1 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.15.1-pyhd8ed1ab_0.conda#d316679235612869eba305aa7d41d9bf -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc +https://conda.anaconda.org/conda-forge/noarch/networkx-3.2.1-pyhd8ed1ab_0.conda#425fce3b531bed6ec3c74fab3e5f0a1c +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.2.2-pyhd8ed1ab_0.conda#6f6cf28bf8e021933869bae3f84b8fc9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py39hb9d737c_5.tar.bz2#ef9db3c38ae7275f6b14491cfe61a248 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.0.0-pyhd8ed1ab_0.conda#c8ddb4f34a208df4dd42509a0f6a1c89 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.4-pyhd8ed1ab_0.conda#5a31a7d564f551d0e6dff52fd8cb5b16 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.1-pyhd8ed1ab_0.conda#6c8c4d6eb2325e59290ac6dbbeacd5f0 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.2-pyhd8ed1ab_0.conda#7b39e842b52966a99e229739cd4dc36e -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda#3f144b2c34f8cb5a9abd9ed23a39c561 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.3.0-pyhd8ed1ab_0.conda#216cfa8e32bcd1447646768351df6059 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py39hd1e30aa_0.conda#da334eecb1ea2248e28294c49e6f6d89 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39hb9d737c_0.tar.bz2#230d65004135bf312504a1bbcb0c7a08 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py39hd3abc70_0.conda#c183e99f9320e5e2d0f9c43efcb3fb22 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_0.conda#ebe6952715e1d5eb567eeebf25250fa7 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/noarch/babel-2.12.1-pyhd8ed1ab_1.conda#ac432e732804a81ddcf29c92ead57cde -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.18.1-pyhd8ed1ab_0.conda#c77c4aabc01b156a8cb4395f0233d335 +https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.4-pyhd8ed1ab_0.conda#46a2e6e3dfa718ce3492018d5a110dd6 +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.12.3-pyha770c72_0.conda#332493000404d8411859539a5a630865 https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc -https://conda.anaconda.org/conda-forge/linux-64/cfitsio-4.2.0-hd9d235c_0.conda#8c57a9adbafd87f5eff842abde599cb4 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py39h4b4f3f3_0.conda#c5387f3fb1f5b8b71e1c865fc55f4951 -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.5.2-hf52228f_0.conda#6b3b19e359824b97df7145c8c878c8be -https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py39hb9d737c_1.tar.bz2#eb31327ace8dac15c2df243d9505a132 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py39hd1e30aa_0.conda#80605b792f58cf5c78a5b7e20cef1e35 -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.5.2-hdb1a99f_0.conda#265323e1bd53709aeb739c9b1794b398 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_1.conda#28de2e073db9ca9b72858bee9fb6f571 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.53.0-py39hd3abc70_0.conda#9dae301603c88aef61dba733e8931cdd +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_1.conda#cf4b0e7c4c78bb0662aed9b27c414a3c +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py39haaeba84_1.conda#d7aa9b99ed6ade75fbab1e4cedcb3ce2 -https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df -https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.1-pyhd8ed1ab_0.conda#f64bedfdb8e3f93ac69b84f530397d0e -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h389d5f1_0.conda#9eeb2b2549f836ca196c6cbd22344122 -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py39h3d6467e_0.conda#6d990f672cc70e5c480ddb74b789a17c -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.5.2-ha770c72_0.conda#f95226244ee1c487cf53272f971323f4 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df -https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2023.1.23-py39h9e8eca3_2.conda#32dec18d45c74c2405021eb8dc42a521 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.28.1-pyh24c5eb1_0.conda#ef3541a8cd9a55879932486a097b7fed -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b -https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.6.0-hd8ed1ab_0.conda#3cbc9615f10a3d471532b83e4250b971 -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py39h40cae4c_0.conda#de99b3f807c0b295a7df94623df0fb4c -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py39h227be39_3.conda#9e381db00691e26bcf670c3586397be1 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/noarch/dask-core-2023.5.1-pyhd8ed1ab_0.conda#b90a2dec6d308d71649dbe58dc32c337 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py39he190548_0.conda#f2a931db797bb58bd335f4a857b4c898 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb -https://conda.anaconda.org/conda-forge/noarch/tifffile-2023.4.12-pyhd8ed1ab_0.conda#b2ade33a630dada190c1220f3515fc5c -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.5.0-pyhd8ed1ab_0.tar.bz2#3c275d7168a6a135329f4acb364c229a -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py39h6183b62_3.conda#84c4007675da392fdb99faeefda69552 -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.13.0-pyhd8ed1ab_0.conda#26c51b97ce59bbcce6a35ff45bc5c900 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/plotly-5.22.0-pyhd8ed1ab_0.conda#5b409a5f738e7d76c2b426eddb7e9956 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_1.conda#d8d07866ac3b5b6937213c89a1874f08 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.4-haf2f30d_0.conda#926c2c7ee7a0b48d6d70783a33f7bc80 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/lazy_loader-0.4-pyhd8ed1ab_0.conda#a284ff318fbdb0dd83928275b4b6087c +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda#5ede4753180c7a550a443c430dc8ab52 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.4-h9ad1361_0.conda#147cce520ec59367549fd0d96d404213 +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.6.1-py39hbbab4d9_0.conda#bc3c956def472cc1562a325198db91c0 +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hfc16268_1.conda#8b23d2b425035a7468d17e6fe1d54124 +https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.31-py39ha963410_0.conda#ef7ffefe34eae8f69a2ed0cdf2a27678 +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.2-pyhd8ed1ab_0.conda#8dab97d8a9616e07d779782995710aed +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h44dd56e_1.conda#d037c20e3da2e85f03ebd20ad480c359 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.1-py39haf93ffa_0.conda#492a2cd65862d16a4aaf535ae9ccb761 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39h10d1fc8_2.conda#c9fb6571b93b1dd490ea627af7344f36 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39h85c637f_1.conda#b2b15112d019e27e62f9433e31607d08 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.2-py39hd92a3bb_0.conda#2f6c03d60e71f13d92d511b06193f007 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2024.5.22-pyhd8ed1ab_0.conda#3930cabe8ca8c8594026fa8768cae75c +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.22.0-py39hddac248_2.conda#8d502a4d2cbe5a45ff35ca8af8cbec0a +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_2.conda#b713b116feaf98acdba93ad4d7f90ca1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_2.conda#bd956c7563b6a6b27521b83623c74e22 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_2.conda#a79d8797f62715255308d92d3a91ef2e +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_1.conda#66798cbfdcb003d9fbccd92cd08eb3ac +https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.15.3-pyhd8ed1ab_0.conda#55e445f4fcb07f2471fb0e1102d36488 +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 +https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.5.0-pyhd8ed1ab_0.conda#264b3c697fa9cdade87eb0abe4440d54 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.16.0-pyhd8ed1ab_0.conda#add28691ee89e875b190eda07929d5d4 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 -https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.8.2-pyhd8ed1ab_0.conda#7f330c6004309c83cc63aed39b70d711 -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py39h9ff65d1_0.conda#b68d27031efaec0ebab9d20d52135abd -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py39h5c7b992_3.conda#19e30314fe824605750da905febb8ee6 -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.3-py39h4661b88_2.tar.bz2#a8d53b12aedcd84107ba8c85c81be56f -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py39hf3d152e_0.conda#682772fa385911fb5efffbce21b269c5 -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.0-py39h0f8d45d_1.conda#b4f7f4de7614a8406935f56b1eef6a75 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160 -# pip attrs @ https://files.pythonhosted.org/packages/f0/eb/fcb708c7bf5056045e9e98f62b93bd7467eb718b0202e7698eb11d66416c/attrs-23.1.0-py3-none-any.whl#sha256=1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04 +https://conda.anaconda.org/conda-forge/noarch/sphinx-remove-toctrees-1.0.0.post1-pyhd8ed1ab_0.conda#6dee8412218288a17f99f2cfffab334d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e +https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_0.conda#286283e05a1eff606f55e7cd70f6d7f7 +# pip attrs @ https://files.pythonhosted.org/packages/e0/44/827b2a91a5816512fcaf3cc4ebc465ccd5d598c45cefa6703fcf4a79018f/attrs-23.2.0-py3-none-any.whl#sha256=99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +# pip cloudpickle @ https://files.pythonhosted.org/packages/96/43/dae06432d0c4b1dc9e9149ad37b4ca8384cf6eb7700cd9215b177b914f0a/cloudpickle-3.0.0-py3-none-any.whl#sha256=246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7 # pip defusedxml @ https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl#sha256=a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61 -# pip fastjsonschema @ https://files.pythonhosted.org/packages/d4/a1/b3816c7945742ee95e2fb68dd59aaa205c9ce53ffd90704f70c2207a7b4d/fastjsonschema-2.17.1-py3-none-any.whl#sha256=4b90b252628ca695280924d863fe37234eebadc29c5360d322571233dc9746e0 +# pip fastjsonschema @ https://files.pythonhosted.org/packages/9c/b9/79691036d4a8f9857e74d1728b23f34f583b81350a27492edda58d5604e1/fastjsonschema-2.19.1-py3-none-any.whl#sha256=3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0 # pip fqdn @ https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl#sha256=3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014 -# pip json5 @ https://files.pythonhosted.org/packages/70/ba/fa37123a86ae8287d6678535a944f9c3377d8165e536310ed6f6cb0f0c0e/json5-0.9.14-py2.py3-none-any.whl#sha256=740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f -# pip jsonpointer @ https://files.pythonhosted.org/packages/a3/be/8dc9d31b50e38172c8020c40f497ce8debdb721545ddb9fcb7cca89ea9e6/jsonpointer-2.3-py2.py3-none-any.whl#sha256=51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9 -# pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/c0/7e/c3d1df3ae9b41686e664051daedbd70eea2e1d2bd9d9c33e7e1455bc9f96/jupyterlab_pygments-0.2.2-py2.py3-none-any.whl#sha256=2405800db07c9f770863bcf8049a529c3dd4d3e28536638bd7c1c01d2748309f -# pip mistune @ https://files.pythonhosted.org/packages/9f/e5/780d22d19543f339aad583304f58002975b586757aa590cbe7bea5cc6f13/mistune-2.0.5-py2.py3-none-any.whl#sha256=bad7f5d431886fcbaf5f758118ecff70d31f75231b34024a1341120340a65ce8 -# pip overrides @ https://files.pythonhosted.org/packages/7f/36/3fef66c2bf1f66f35538a6703aca0447114b1873913c403f0ea589721aae/overrides-7.3.1-py3-none-any.whl#sha256=6187d8710a935d09b0bcef8238301d6ee2569d2ac1ae0ec39a8c7924e27f58ca -# pip pandocfilters @ https://files.pythonhosted.org/packages/5e/a8/878258cffd53202a6cc1903c226cf09e58ae3df6b09f8ddfa98033286637/pandocfilters-1.5.0-py2.py3-none-any.whl#sha256=33aae3f25fd1a026079f5d27bdd52496f0e0803b3469282162bafdcbdf6ef14f -# pip pkginfo @ https://files.pythonhosted.org/packages/b3/f2/6e95c86a23a30fa205ea6303a524b20cbae27fbee69216377e3d95266406/pkginfo-1.9.6-py3-none-any.whl#sha256=4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 -# pip prometheus-client @ https://files.pythonhosted.org/packages/5b/62/75fc6f255e214ff0a8bd3267a0bd337521dd24f76cd593c10795e488f41b/prometheus_client-0.17.0-py3-none-any.whl#sha256=a77b708cf083f4d1a3fb3ce5c95b4afa32b9c521ae363354a4a910204ea095ce +# pip json5 @ https://files.pythonhosted.org/packages/8a/3c/4f8791ee53ab9eeb0b022205aa79387119a74cc9429582ce04098e6fc540/json5-0.9.25-py3-none-any.whl#sha256=34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f +# pip jsonpointer @ https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl#sha256=15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a +# pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl#sha256=841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780 +# pip libsass @ https://files.pythonhosted.org/packages/fd/5a/eb5b62641df0459a3291fc206cf5bd669c0feed7814dded8edef4ade8512/libsass-0.23.0-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl#sha256=4a218406d605f325d234e4678bd57126a66a88841cb95bee2caeafdc6f138306 +# pip mistune @ https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl#sha256=71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 +# pip overrides @ https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl#sha256=c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49 +# pip pandocfilters @ https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl#sha256=93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc +# pip pkginfo @ https://files.pythonhosted.org/packages/c0/38/d617739840a2f576e400f03fea0a75703f93cc274002635b4b998bbb9de4/pkginfo-1.11.1-py3-none-any.whl#sha256=bfa76a714fdfc18a045fcd684dbfc3816b603d9d075febef17cb6582bea29573 +# pip prometheus-client @ https://files.pythonhosted.org/packages/c7/98/745b810d822103adca2df8decd4c0bbe839ba7ad3511af3f0d09692fc0f0/prometheus_client-0.20.0-py3-none-any.whl#sha256=cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7 # pip ptyprocess @ https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 -# pip pycparser @ https://files.pythonhosted.org/packages/62/d5/5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53/pycparser-2.21-py2.py3-none-any.whl#sha256=8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 -# pip pyrsistent @ https://files.pythonhosted.org/packages/64/de/375aa14daaee107f987da76ca32f7a907fea00fa8b8afb67dc09bec0de91/pyrsistent-0.19.3-py3-none-any.whl#sha256=ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64 +# pip pycparser @ https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl#sha256=c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc # pip python-json-logger @ https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl#sha256=f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd +# pip pyyaml @ https://files.pythonhosted.org/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c # pip rfc3986-validator @ https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl#sha256=2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9 -# pip send2trash @ https://files.pythonhosted.org/packages/a9/78/e4df1e080ed790acf3a704edf521006dd96b9841bd2e2a462c0d255e0565/Send2Trash-1.8.2-py3-none-any.whl#sha256=a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679 -# pip sniffio @ https://files.pythonhosted.org/packages/c3/a0/5dba8ed157b0136607c7f2151db695885606968d1fae123dc3391e0cfdbf/sniffio-1.3.0-py3-none-any.whl#sha256=eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384 -# pip soupsieve @ https://files.pythonhosted.org/packages/49/37/673d6490efc51ec46d198c75903d99de59baffdd47aea3d071b80a9e4e89/soupsieve-2.4.1-py3-none-any.whl#sha256=1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8 -# pip traitlets @ https://files.pythonhosted.org/packages/77/75/c28e9ef7abec2b7e9ff35aea3e0be6c1aceaf7873c26c95ae1f0d594de71/traitlets-5.9.0-py3-none-any.whl#sha256=9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8 -# pip uri-template @ https://files.pythonhosted.org/packages/c0/db/d4f9c75b43541f7235daf4d13eb43f4491f9d5f5df45ce41daeed3a903f6/uri_template-1.2.0-py3-none-any.whl#sha256=f1699c77b73b925cf4937eae31ab282a86dc885c333f2e942513f08f691fc7db -# pip webcolors @ https://files.pythonhosted.org/packages/d5/e1/3e9013159b4cbb71df9bd7611cbf90dc2c621c8aeeb677fc41dad72f2261/webcolors-1.13-py3-none-any.whl#sha256=29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf +# pip rpds-py @ https://files.pythonhosted.org/packages/97/b1/12238bd8cdf3cef71e85188af133399bfde1bddf319007361cc869d6f6a7/rpds_py-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab +# pip send2trash @ https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl#sha256=0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9 +# pip sniffio @ https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl#sha256=2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 +# pip traitlets @ https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl#sha256=b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f +# pip types-python-dateutil @ https://files.pythonhosted.org/packages/c7/1b/af4f4c4f3f7339a4b7eb3c0ab13416db98f8ac09de3399129ee5fdfa282b/types_python_dateutil-2.9.0.20240316-py3-none-any.whl#sha256=6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b +# pip uri-template @ https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl#sha256=a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363 +# pip webcolors @ https://files.pythonhosted.org/packages/3b/45/0c30e10a2ac52606476394e4ba11cf3b12ba5823e7fbb9167f80eee6000a/webcolors-24.6.0-py3-none-any.whl#sha256=8cf5bc7e28defd1d48b9e83d5fc30741328305a8195c29a8e668fa45586568a1 # pip webencodings @ https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl#sha256=a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 -# pip websocket-client @ https://files.pythonhosted.org/packages/86/5c/2ebfbb7d4dbb7f35a1f70c40d003f7844d78945ac7c69757067ebaea9c78/websocket_client-1.5.2-py3-none-any.whl#sha256=f8c64e28cd700e7ba1f04350d66422b6833b82a796b525a51e740b8cc8dab4b1 -# pip anyio @ https://files.pythonhosted.org/packages/68/fe/7ce1926952c8a403b35029e194555558514b365ad77d75125f521a2bec62/anyio-3.7.0-py3-none-any.whl#sha256=eddca883c4175f14df8aedce21054bfca3adb70ffe76a9f607aef9d7fa2ea7f0 -# pip arrow @ https://files.pythonhosted.org/packages/67/67/4bca5a595e2f89bff271724ddb1098e6c9e16f7f3d018d120255e3c30313/arrow-1.2.3-py3-none-any.whl#sha256=5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2 -# pip beautifulsoup4 @ https://files.pythonhosted.org/packages/57/f4/a69c20ee4f660081a7dedb1ac57f29be9378e04edfcb90c526b923d4bebc/beautifulsoup4-4.12.2-py3-none-any.whl#sha256=bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a -# pip bleach @ https://files.pythonhosted.org/packages/ac/e2/dfcab68c9b2e7800c8f06b85c76e5f978d05b195a958daa9b1dda54a1db6/bleach-6.0.0-py3-none-any.whl#sha256=33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4 -# pip cffi @ https://files.pythonhosted.org/packages/2d/86/3ca57cddfa0419f6a95d1c8478f8f622ba597e3581fd501bbb915b20eb75/cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 +# pip websocket-client @ https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl#sha256=17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526 +# pip anyio @ https://files.pythonhosted.org/packages/7b/a2/10639a79341f6c019dedc95bd48a4928eed9f1d1197f4c04f546fc7ae0ff/anyio-4.4.0-py3-none-any.whl#sha256=c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7 +# pip arrow @ https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl#sha256=c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80 +# pip bleach @ https://files.pythonhosted.org/packages/ea/63/da7237f805089ecc28a3f36bca6a21c31fcbc2eb380f3b8f1be3312abd14/bleach-6.1.0-py3-none-any.whl#sha256=3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6 +# pip cffi @ https://files.pythonhosted.org/packages/ea/ac/e9e77bc385729035143e54cc8c4785bd480eaca9df17565963556b0b7a93/cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 # pip doit @ https://files.pythonhosted.org/packages/44/83/a2960d2c975836daa629a73995134fd86520c101412578c57da3d2aa71ee/doit-0.36.0-py3-none-any.whl#sha256=ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a -# pip jupyter-core @ https://files.pythonhosted.org/packages/41/1e/92a67f333b9335f04ce409799c030dcfb291712658b9d9d13997f7c91e5a/jupyter_core-5.3.0-py3-none-any.whl#sha256=d4201af84559bc8c70cead287e1ab94aeef3c512848dde077b7684b54d67730d +# pip jupyter-core @ https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl#sha256=4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409 +# pip referencing @ https://files.pythonhosted.org/packages/b7/59/2056f61236782a2c86b33906c025d4f4a0b17be0161b63b70fd9e8775d36/referencing-0.35.1-py3-none-any.whl#sha256=eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de # pip rfc3339-validator @ https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl#sha256=24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa -# pip terminado @ https://files.pythonhosted.org/packages/84/a7/c7628d79651b8c8c775d27b374315a825141b5783512e82026fb210dd639/terminado-0.17.1-py3-none-any.whl#sha256=8650d44334eba354dd591129ca3124a6ba42c3d5b70df5051b6921d506fdaeae -# pip tinycss2 @ https://files.pythonhosted.org/packages/da/99/fd23634d6962c2791fb8cb6ccae1f05dcbfc39bce36bba8b1c9a8d92eae8/tinycss2-1.2.1-py3-none-any.whl#sha256=2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847 +# pip sphinxcontrib-sass @ https://files.pythonhosted.org/packages/2e/87/7c2eb08e3ca1d6baae32c0a5e005330fe1cec93a36aa085e714c3b3a3c7d/sphinxcontrib_sass-0.3.4-py2.py3-none-any.whl#sha256=a0c79a44ae8b8935c02dc340ebe40c9e002c839331201c899dc93708970c355a +# pip terminado @ https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl#sha256=a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 +# pip tinycss2 @ https://files.pythonhosted.org/packages/2c/4d/0db5b8a613d2a59bbc29bc5bb44a2f8070eb9ceab11c50d477502a8a0092/tinycss2-1.3.0-py3-none-any.whl#sha256=54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7 # pip argon2-cffi-bindings @ https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae # pip isoduration @ https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl#sha256=b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042 -# pip jupyter-server-terminals @ https://files.pythonhosted.org/packages/ea/7f/36db12bdb90f5237766dcbf59892198daab7260acbcf03fc75e2a2a82672/jupyter_server_terminals-0.4.4-py3-none-any.whl#sha256=75779164661cec02a8758a5311e18bb8eb70c4e86c6b699403100f1585a12a36 -# pip jupyterlite-core @ https://files.pythonhosted.org/packages/5e/25/dd347708151764152b75f6606c02e2571e1228ba52c28502495b48ac17d8/jupyterlite_core-0.1.0-py3-none-any.whl#sha256=2f17afa282447594cb38f7a1d2619ceface28d8f4747e038790cac22c394e804 -# pip pyzmq @ https://files.pythonhosted.org/packages/94/4b/1093172b73984b568d9f1a72bcd61793822fab40aa571f5d6ed9db6234cb/pyzmq-25.1.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=4c2fc7aad520a97d64ffc98190fce6b64152bde57a10c704b337082679e74f67 -# pip argon2-cffi @ https://files.pythonhosted.org/packages/a8/07/946d5a9431bae05a776a59746ec385fbb79b526738d25e4202d3e0bbf7f4/argon2_cffi-21.3.0-py3-none-any.whl#sha256=8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80 -# pip jsonschema @ https://files.pythonhosted.org/packages/c1/97/c698bd9350f307daad79dd740806e1a59becd693bd11443a0f531e3229b3/jsonschema-4.17.3-py3-none-any.whl#sha256=a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6 -# pip jupyter-client @ https://files.pythonhosted.org/packages/07/37/4019d2c41ca333c08dfdfeb84c0fc0368c8defbbd3c8f0c9a530851e5813/jupyter_client-8.2.0-py3-none-any.whl#sha256=b18219aa695d39e2ad570533e0d71fb7881d35a873051054a84ee2a17c4b7389 -# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/0a/26/fe1ffe15bcec2a78fe50480d463166484869605947636056425381d2542b/jupyterlite_pyodide_kernel-0.0.8-py3-none-any.whl#sha256=32c83f99c4b8aebd5d351c6a8a4abc153c3a11f2d1ed62efc872d56224437ef6 -# pip jupyter-events @ https://files.pythonhosted.org/packages/ee/14/e11a93c1b47a69432ee7898f1b55f1da27f2f93b009a34dbdafb9b903f81/jupyter_events-0.6.3-py3-none-any.whl#sha256=57a2749f87ba387cd1bfd9b22a0875b889237dbf2edc2121ebb22bde47036c17 -# pip nbformat @ https://files.pythonhosted.org/packages/e1/ce/7f0f454b4e7f1cb31345f9f977bdce7486033a1c08b5945b17ea95c4afbc/nbformat-5.9.0-py3-none-any.whl#sha256=8c8fa16d6d05062c26177754bfbfac22de644888e2ef69d27ad2a334cf2576e5 -# pip nbclient @ https://files.pythonhosted.org/packages/ac/5a/d670ca51e6c3d98574b9647599821590efcd811d71f58e9c89fc59a17685/nbclient-0.8.0-py3-none-any.whl#sha256=25e861299e5303a0477568557c4045eccc7a34c17fc08e7959558707b9ebe548 -# pip nbconvert @ https://files.pythonhosted.org/packages/2f/90/79bf16b584f5150550b0c175ca7a6e88334226e9275cf16db13785105d73/nbconvert-7.4.0-py3-none-any.whl#sha256=af5064a9db524f9f12f4e8be7f0799524bd5b14c1adea37e34e83c95127cc818 -# pip jupyter-server @ https://files.pythonhosted.org/packages/6f/04/b2e87b4ee96a2219df7666706b28c9ebffd9895fc98fe4b5c56b8b6931ce/jupyter_server-2.6.0-py3-none-any.whl#sha256=19525a1515b5999618a91b3e99ec9f6869aa8c5ba73e0b6279fcda918b54ba36 -# pip jupyterlab-server @ https://files.pythonhosted.org/packages/ad/31/cfb84feb3803c1e0e69dbe6928ab9251b9a1548b9092a5013413c0dd49f8/jupyterlab_server-2.22.1-py3-none-any.whl#sha256=1c8eb55c7cd70a50a51fef42a7b4e26ef2f7fc48728f0290604bd89b1dd156e6 -# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/34/a9/a050b891d5d06a3fe73f1e16992a846a6f6ba21660ac053f5064cbf98bae/jupyterlite_sphinx-0.8.0-py3-none-any.whl#sha256=4a20fcb585ef036d3ed1c62cd6270351f810bc9586d3638f55e6a98665b3373d +# pip jsonschema-specifications @ https://files.pythonhosted.org/packages/ee/07/44bd408781594c4d0a027666ef27fab1e441b109dc3b76b4f836f8fd04fe/jsonschema_specifications-2023.12.1-py3-none-any.whl#sha256=87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c +# pip jupyter-server-terminals @ https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl#sha256=41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa +# pip jupyterlite-core @ https://files.pythonhosted.org/packages/05/d2/1d59d9a70d684b1eb3eb3a0b80a36b4e1d691e94af5d53aee56b1ad5240b/jupyterlite_core-0.3.0-py3-none-any.whl#sha256=247cc34ae6fedda41b15ce4778997164508b2039bc92480665cadfe955193467 +# pip pyzmq @ https://files.pythonhosted.org/packages/64/b8/1c181c13e118cabccfd25bd3e169e44958c649180b0d78b798a66899e08b/pyzmq-26.0.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8 +# pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea +# pip jsonschema @ https://files.pythonhosted.org/packages/c8/2f/324fab4be6fe37fb7b521546e8a557e6cf08c1c1b3d0b4839a00f589d9ef/jsonschema-4.22.0-py3-none-any.whl#sha256=ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802 +# pip jupyter-client @ https://files.pythonhosted.org/packages/cf/d3/c4bb02580bc0db807edb9a29b2d0c56031be1ef0d804336deb2699a470f6/jupyter_client-8.6.2-py3-none-any.whl#sha256=50cbc5c66fd1b8f65ecb66bc490ab73217993632809b6e505687de18e9dea39f +# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/42/ce/87fadd7eaa01caaa564d3345025b983f72b4200abc82245068bd2664fb56/jupyterlite_pyodide_kernel-0.3.2-py3-none-any.whl#sha256=ae600571fa755b6fd7a2633a171de3fe490f2b1264bef32cdd7e8c34c95cd5ff +# pip jupyter-events @ https://files.pythonhosted.org/packages/a5/94/059180ea70a9a326e1815176b2370da56376da347a796f8c4f0b830208ef/jupyter_events-0.10.0-py3-none-any.whl#sha256=4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960 +# pip nbformat @ https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl#sha256=3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b +# pip nbclient @ https://files.pythonhosted.org/packages/66/e8/00517a23d3eeaed0513e718fbc94aab26eaa1758f5690fc8578839791c79/nbclient-0.10.0-py3-none-any.whl#sha256=f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f +# pip nbconvert @ https://files.pythonhosted.org/packages/b8/bb/bb5b6a515d1584aa2fd89965b11db6632e4bdc69495a52374bcc36e56cfa/nbconvert-7.16.4-py3-none-any.whl#sha256=05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3 +# pip jupyter-server @ https://files.pythonhosted.org/packages/26/f5/be75c159deda5b54e15cf54029915ad28337fcfef402d671566c45f9e61f/jupyter_server-2.14.1-py3-none-any.whl#sha256=16f7177c3a4ea8fe37784e2d31271981a812f0b2874af17339031dc3510cc2a5 +# pip jupyterlab-server @ https://files.pythonhosted.org/packages/cb/46/d5ffd7c0f63db4e9f0982c3d58efeea10fc5f47e79fb328431df78843772/jupyterlab_server-2.27.2-py3-none-any.whl#sha256=54aa2d64fd86383b5438d9f0c032f043c4d8c0264b8af9f60bd061157466ea43 +# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/71/2c/bd797dc46a7281d43444c79ff312d4f8d27d41a0de05f48cad81c7939966/jupyterlite_sphinx-0.15.0-py3-none-any.whl#sha256=344d1f9ee5a20b141a4a4139874eae30a68216f0c995d03ea2e3b3e9d29c4cd5 diff --git a/build_tools/circle/doc_min_dependencies_environment.yml b/build_tools/circle/doc_min_dependencies_environment.yml index fb9c1f34ef618..e27c3a700fdad 100644 --- a/build_tools/circle/doc_min_dependencies_environment.yml +++ b/build_tools/circle/doc_min_dependencies_environment.yml @@ -4,30 +4,38 @@ channels: - conda-forge dependencies: - - python=3.8 - - numpy=1.17.3 # min + - python=3.9 + - numpy=1.19.5 # min - blas - - scipy=1.5.0 # min - - cython=0.29.33 # min + - scipy=1.6.0 # min + - cython=3.0.10 # min - joblib - threadpoolctl - - matplotlib=3.1.3 # min - - pandas=1.0.5 # min + - matplotlib=3.3.4 # min + - pandas=1.1.5 # min - pyamg - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools - - scikit-image=0.16.2 # min + - pip + - ninja + - meson-python + - scikit-image=0.17.2 # min - seaborn - memory_profiler - compilers - - sphinx=4.0.1 # min - - sphinx-gallery=0.7.0 # min + - sphinx=7.3.7 # min + - sphinx-gallery=0.16.0 # min + - sphinx-copybutton=0.5.2 # min - numpydoc=1.2.0 # min - - sphinx-prompt=1.3.0 # min + - sphinx-prompt=1.4.0 # min - plotly=5.14.0 # min - - pooch + - polars=0.20.23 # min + - pooch=1.6.0 # min + - sphinx-remove-toctrees=1.0.0.post1 # min + - sphinx-design=0.5.0 # min + - pydata-sphinx-theme=0.15.3 # min - pip - pip: - - sphinxext-opengraph==0.4.2 # min + - sphinxext-opengraph==0.9.1 # min + - sphinxcontrib-sass==0.3.4 # min diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock index a2d5cbb8554ff..f445fe196a6da 100644 --- a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock @@ -1,174 +1,256 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: b6da36fc22a70d5ecc78b7b7beca6ea69727004c0a3021ad5474f9bcbe59b2ac +# input_hash: aa64e81a701c97b7c4cf149f108c3ca59fc65572bfda79dbaeb2d093afc8a665 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.36.1-hea4e1c9_2.tar.bz2#bd4f2e711b39af170e7ff15163fe87ee -https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-7.5.0-hda03d7c_20.tar.bz2#2146b25eb2a762a44fab709338a7b6d9 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_20.tar.bz2#a072eab836c3a9578ce72b5640ce592d -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-7.5.0-hb016644_20.tar.bz2#31d5500f621954679ee41d7f5d1089fb -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_20.tar.bz2#c3b2ad091c043c08689e64b10741484b -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133 -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.36.1-h193b22a_2.tar.bz2#32aae4265554a47ea77f7c09f86aeb3b -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.36.1-hdd6e379_2.tar.bz2#3111f86041b5b6863545ca49130cca95 -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.36-hf3e587d_33.tar.bz2#72b245322c589284f1b92a5c971e5cb6 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.6.2-hbcca054_0.conda#847c3c2905cc467cea52c24f9cfa8080 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_3.conda#7c1062eaa78dec4ea8a9a988dbda6045 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h0223996_107.conda#851e9651c9e4cd5dc19f80398eba9a1c +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h0223996_107.conda#167a1f5d77d8f3c2a638f7eb418429f1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2024.1.0-ha957f24_693.conda#249c91c2186d236c6d180342241db2ec +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha1999f0_3.conda#2b0c0d451353cde73295d799ea8886b1 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_3.conda#abcdaa12050c40a74330701f22418e5a +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hb3c18ed_4.conda#19286994c03c5207a70c7cfabe294570 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-7.5.0-habd7529_20.tar.bz2#42140612518a7ce78f571d64b6a50ba3 -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/icu-64.2-he1b5a44_1.tar.bz2#8e881214a23508f1541eb7a3135d6fcb -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h0b41bf4_3.conda#c7a069243e1fbe9a556ed2ec030e6407 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.17-h0b41bf4_0.conda#5cc781fd91968b11a8a7fdbee0982676 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1007.tar.bz2#11389072d7d6036fd811c3d9460475cd -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-hb8811af_7.conda#ee573415c47ce17f65101d0b3fba396d https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-0.10.0-he1b5a44_0.tar.bz2#78ccac2098edcd3673af2ceb3e95f932 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1u-hd590300_0.conda#cc1c2db83ae28a28871d52b035739488 -https://conda.anaconda.org/conda-forge/linux-64/pcre-8.45-h9c3ff4c_0.tar.bz2#c05d1820a6d34ff07aaaab7a9b7eddaa +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-h4ab18f5_0.conda#a41fa0e391cc9e0d6b78ac69ca047a6c +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-7.5.0-h47867f9_33.tar.bz2#3a31c3f430a31184a5d07e67d3b24e2c -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-7.5.0-h56cb351_20.tar.bz2#8f897b30195bd3a2251b4c51c3cc91cf -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-7.5.0-hd0bb8aa_20.tar.bz2#dbe78fc5fb9c339f8e55426559e12f7b -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libllvm9-9.0.1-default_hc23dcda_7.tar.bz2#9f4686a2c319355fe8636ca13783c3b4 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h58ffeeb_7.conda#95f78565a09852783d3e90e0389cfa5f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_1.conda#340278ded8b0dc3a73f3660bbb0adbc6 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.1.1-h516909a_0.tar.bz2#d98aa4948ec35f52907e2d6152e2b255 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-7.5.0-h78c8a43_33.tar.bz2#b2879010fb369f4012040f7a27657cd8 -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-7.5.0-h555fc39_33.tar.bz2#5cf979793d2c5130a012cb6480867adc -https://conda.anaconda.org/conda-forge/linux-64/libclang-9.0.1-default_hb4e5071_5.tar.bz2#9dde69aa2a8ecd575a16e44987bdc9f7 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz2#d5a09a9e981849b751cb75656b7302a0 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-h6adf6a1_2.conda#2e648a34072eb39d7c4fc2a9981c5f0c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea -https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.42.0-h2c6b66d_0.conda#1192f6ec654a5bc4ee1d64bdc4a3e5cc -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-hfd0df8a_0.conda#aa8840cdf17ef0c6084d1e24abc7a28b -https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/linux-64/python-3.8.6-h852b56e_0_cpython.tar.bz2#dd65401dfb61ac030edc0dc4d15c2c51 -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.13-pyhd8ed1ab_0.conda#06006184e203b61d3525f90de394471e -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf -https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.1-pyhd8ed1ab_0.conda#b325bfc4cff7d7f8a868f1f7ecc4ed16 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_7.conda#84b1c5cebd0a0443f3d7f90a4be93fc6 +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6b3dd4b_4.conda#4b76ee727fca36fd83ef58586516c46a +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h1645026_7.conda#2d9d4058c433c9ce2a811c76658c4efd +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h2a574ab_7.conda#265caa78b979f112fc241cecd0015c91 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h5622ce7_1001.conda#fc2d5b79c2d3f8568fbab31db7ae02f3 +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.7-hb77312f_0.conda#bc0ea7e1f75a9b1c8467597fbbd9f86b +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.7-ha31de31_0.conda#7234f31acd176e402e91e03feba90f7d +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/noarch/appdirs-1.4.4-pyh9f0ad1d_0.tar.bz2#5f095bc6454094e96f146491fd03633b +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_1.conda#e9dffe1056994133616378309f932d77 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a +https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca +https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.0.0-pyhd8ed1ab_0.conda#753d29fe41bb881e4b9c004f0abf973f https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267e19dbaf3db9dd0404e6fb9cdb9 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.33-py38h8dc9893_0.conda#5d50cd654981f0ccc7c878ac297afaa7 -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.17.1-py38h578d9bd_3.tar.bz2#34e1f12e3ed15aff218644e9d865b722 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.5.0-pyh1a96a4e_0.conda#20edd290b319aa0eff3e9055375756dc -https://conda.anaconda.org/conda-forge/linux-64/glib-2.66.3-h58526e2_0.tar.bz2#62c2e5c84f6cdc7ded2307ef9c30dc8c -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.6.0-pyhff2d567_0.conda#ad6af3f92e71b1579ac2362b6cf29105 +https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_7.conda#8efa768f7f74085629f3e1090e7f0569 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h5877db1_4.conda#aa5735fc506449d6bc2ec27bc066364e +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_7.conda#721c5433122a02bf3a081db10a2e68e2 +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-ha28b414_4.conda#a2046880d7a1a377824bac6538d85aff +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.7-default_h087397f_0.conda#536526073c2e7f9056fdce8584da779e +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-1.1.1-py38h0a891b7_4.tar.bz2#d182e0c60439427453ed4a7abd28ef0d -https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4 -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.4.0-py38hde6dc18_1.conda#3de5619d3f556f966189e5251a266125 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py38h1de0b5d_0.conda#92e899e7b0ed27c793014d1fa54f9b7b -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.15.1-pyhd8ed1ab_0.conda#d316679235612869eba305aa7d41d9bf -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/noarch/networkx-3.2-pyhd8ed1ab_0.conda#cec8cc498664cc00a070676aa89e69a7 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py38h0a891b7_5.tar.bz2#0856c59f9ddb710c640dc0428d66b1b7 -https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py38h578d9bd_1.tar.bz2#da023e4a9c777abc28434d7a6473dcc2 +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py39hd1e30aa_1.conda#37218233bcdc310e4fde6453bc1b40d8 +https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py39hf3d152e_1.tar.bz2#4252d0c211566a9f65149ba7f6e87aa4 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.4-pyhd8ed1ab_0.conda#5a31a7d564f551d0e6dff52fd8cb5b16 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.1-pyhd8ed1ab_0.conda#6c8c4d6eb2325e59290ac6dbbeacd5f0 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.2-pyhd8ed1ab_0.conda#7b39e842b52966a99e229739cd4dc36e -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda#3f144b2c34f8cb5a9abd9ed23a39c561 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h297d8ca_1.conda#3ff978d8994f591818a506640c6a7071 +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.3.0-pyhd8ed1ab_0.conda#216cfa8e32bcd1447646768351df6059 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py38h01eb140_0.conda#3db869202b0e523d606d13e81ca79ab6 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/noarch/babel-2.12.1-pyhd8ed1ab_1.conda#ac432e732804a81ddcf29c92ead57cde -https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py38h0a891b7_1.tar.bz2#183f6160ab3498b882e903b06be7d430 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-hfdff14a_1.tar.bz2#4caaca6356992ee545080c7d7193b5a3 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.14.5-h36ae1b5_2.tar.bz2#00084ab2657be5bf0ba0757ccde797ef -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/jinja2-2.11.3-pyhd8ed1ab_2.tar.bz2#bdedf6199eec03402a0c5db1f25e891e -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-20_mkl.tar.bz2#14b25490fdcc44e879ac6c10fe764f68 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-20_mkl.tar.bz2#52c0ae3606eeae7e1d493f37f336f4f5 +https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.1-pyhd8ed1ab_0.conda#2fcb582444635e2c402e8569bb94e039 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py39hd3abc70_0.conda#c183e99f9320e5e2d0f9c43efcb3fb22 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_0.conda#ebe6952715e1d5eb567eeebf25250fa7 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.18.1-pyhd8ed1ab_0.conda#c77c4aabc01b156a8cb4395f0233d335 +https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.4-pyhd8ed1ab_0.conda#46a2e6e3dfa718ce3492018d5a110dd6 +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.12.3-pyha770c72_0.conda#332493000404d8411859539a5a630865 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_1.conda#28de2e073db9ca9b72858bee9fb6f571 +https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.3-py39hd1e30aa_0.conda#dc0fb8e157c7caba4c98f1e1f9d2e5f4 +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_1.conda#cf4b0e7c4c78bb0662aed9b27c414a3c +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48 -https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.1.0-ha957f24_693.conda#ff0f4abf6f94e36a918f1ef4dbeb9769 +https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda#0badf9c54e24cecfb0ad2f99d680c163 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311 -https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.6.0-hd8ed1ab_0.conda#3cbc9615f10a3d471532b83e4250b971 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed -https://conda.anaconda.org/conda-forge/noarch/dask-core-2023.5.0-pyhd8ed1ab_0.conda#03ed2d040648a5ba1063bf1cb0d87b78 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.28.1-pyh24c5eb1_0.conda#ef3541a8cd9a55879932486a097b7fed -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.1.3-py38h250f245_0.tar.bz2#eb182969d8ed019d4de6939f393270d2 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5 -https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.5-hd8c4c69_1.tar.bz2#0e105d4afe0c3c81c4fbd9937ec4f359 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.0-py38h18bccfc_0.tar.bz2#b6fda3b4ee494afef756621daa115d4d -https://conda.anaconda.org/conda-forge/noarch/sphinx-4.0.1-pyh6c4a22f_2.tar.bz2#c203dcc46f262853ecbb9552c50d664e -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.0.0-py38hf6732f7_1003.tar.bz2#44e00bf7a4b6a564e9313181aaea2615 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_1.conda#d8d07866ac3b5b6937213c89a1874f08 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.4-haf2f30d_0.conda#926c2c7ee7a0b48d6d70783a33f7bc80 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-7.1.0-hd8ed1ab_0.conda#6ef2b72d291b39e479d7694efa2b2b98 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_mkl.conda#eb6deb4ba6f92ea3f31c09cb8b764738 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2024.1.0-ha770c72_693.conda#7f422e2cf549a3fb920c95288393870d +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda#5ede4753180c7a550a443c430dc8ab52 +https://conda.anaconda.org/conda-forge/noarch/dask-core-2024.5.2-pyhd8ed1ab_0.conda#1a57a819915e1c169b74933720b138f2 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.4-h9ad1361_0.conda#147cce520ec59367549fd0d96d404213 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_mkl.conda#d6f942423116553f068b2f2d93ffea2e +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_mkl.conda#4edf2e7ce63920e4f539d12e32fb478e +https://conda.anaconda.org/conda-forge/noarch/pooch-1.6.0-pyhd8ed1ab_0.tar.bz2#6429e1d1091c51f626b5dcfdd38bf429 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_mkl.conda#aa0a5a70e1c957d5911e76ac98e471e1 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.19.5-py39hd249d9e_3.tar.bz2#0cf333996ebdeeba8d1c8c1c0ee9eff9 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_mkl.conda#3cb0e51433c88d2f4cdfb50c5c08a683 +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-lite-2019.12.3-py39hd92a3bb_8.conda#5eb64443d4d973c31e179a498e1bb4a2 +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7 +https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c +https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.23-py39ha963410_0.conda#4871f09d653e979d598d2d4cd5fa868d +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39hd257fcd_1.tar.bz2#c4b698994b2d8d2e659ae02202e6abe4 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.6.0-py39hee8e79c_0.tar.bz2#3afcb78281836e61351a2924f3230060 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-mkl.conda#ead856637ff8a7feba572e2cf23b453b +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.3.4-py39hf3d152e_0.tar.bz2#cbaec993375a908bbe506dc7328d747c +https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_1.tar.bz2#6fb0628d6195d8b6caa2422d09296399 https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42 -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13 -https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.3-py38_0.tar.bz2#1992ab91bbff86ded8d99d1f488d8e8b -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.2-py39hd257fcd_0.tar.bz2#bd7cdadf70e34a19333c3aacc40206e8 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2020.6.3-py_0.tar.bz2#1fb771bb25b2eecbc73abf5143fa35bd +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.17.2-py39hde0f152_4.tar.bz2#2a58a7e382317b03f023b2fddf40f8a1 https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160 -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb +https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.15.3-pyhd8ed1ab_0.conda#55e445f4fcb07f2471fb0e1102d36488 +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 +https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.5.0-pyhd8ed1ab_0.conda#264b3c697fa9cdade87eb0abe4440d54 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.16.0-pyhd8ed1ab_0.conda#add28691ee89e875b190eda07929d5d4 +https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 +https://conda.anaconda.org/conda-forge/noarch/sphinx-remove-toctrees-1.0.0.post1-pyhd8ed1ab_0.conda#6dee8412218288a17f99f2cfffab334d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e +# pip libsass @ https://files.pythonhosted.org/packages/fd/5a/eb5b62641df0459a3291fc206cf5bd669c0feed7814dded8edef4ade8512/libsass-0.23.0-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl#sha256=4a218406d605f325d234e4678bd57126a66a88841cb95bee2caeafdc6f138306 +# pip sphinxcontrib-sass @ https://files.pythonhosted.org/packages/2e/87/7c2eb08e3ca1d6baae32c0a5e005330fe1cec93a36aa085e714c3b3a3c7d/sphinxcontrib_sass-0.3.4-py2.py3-none-any.whl#sha256=a0c79a44ae8b8935c02dc340ebe40c9e002c839331201c899dc93708970c355a +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/92/0a/970b80b4fa1feeb6deb6f2e22d4cb14e388b27b315a1afdb9db930ff91a4/sphinxext_opengraph-0.9.1-py3-none-any.whl#sha256=b3b230cc6a5b5189139df937f0d9c7b23c7c204493b22646273687969dcb760e diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index dfcc600957469..e1f8d54b84ec5 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -1,12 +1,17 @@ #!/usr/bin/env python3 -# List all available versions of the documentation +# Write the available versions page (--rst) and the version switcher JSON (--json). +# Version switcher see: +# https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/version-dropdown.html +# https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/announcements.html#announcement-banners + +import argparse import json import re import sys +from urllib.request import urlopen from sklearn.utils.fixes import parse_version -from urllib.request import urlopen def json_urlread(url): @@ -52,14 +57,19 @@ def get_file_size(version): return human_readable_data_quantity(path_details["size"], 1000) -print(":orphan:") -print() -heading = "Available documentation for Scikit-learn" -print(heading) -print("=" * len(heading)) -print() -print("Web-based documentation is available for versions listed below:") -print() +parser = argparse.ArgumentParser() +parser.add_argument("--rst", type=str, required=True) +parser.add_argument("--json", type=str, required=True) +args = parser.parse_args() + +heading = "Available documentation for scikit-learn" +json_content = [] +rst_content = [ + ":orphan:\n", + heading, + "=" * len(heading) + "\n", + "Web-based documentation is available for versions listed below:\n", +] ROOT_URL = ( "https://api.github.com/repos/scikit-learn/scikit-learn.github.io/contents/" # noqa @@ -93,8 +103,9 @@ def get_file_size(version): # Output in order: dev, stable, decreasing other version seen = set() -for name in NAMED_DIRS + sorted( - (k for k in dirs if k[:1].isdigit()), key=parse_version, reverse=True +for i, name in enumerate( + NAMED_DIRS + + sorted((k for k in dirs if k[:1].isdigit()), key=parse_version, reverse=True) ): version_num, file_size = dirs[name] if version_num in seen: @@ -102,17 +113,32 @@ def get_file_size(version): continue else: seen.add(version_num) - name_display = "" if name[:1].isdigit() else " (%s)" % name - path = "https://scikit-learn.org/%s/" % name - out = "* `Scikit-learn %s%s documentation <%s>`_" % ( - version_num, - name_display, - path, - ) + + full_name = f"{version_num}" if name[:1].isdigit() else f"{version_num} ({name})" + path = f"https://scikit-learn.org/{name}/" + + # Update JSON for the version switcher; only keep the 8 latest versions to avoid + # overloading the version switcher dropdown + if i < 8: + info = {"name": full_name, "version": version_num, "url": path} + if name == "stable": + info["preferred"] = True + json_content.append(info) + + # Printout for the historical version page + out = f"* `scikit-learn {full_name} documentation <{path}>`_" if file_size is not None: file_extension = get_file_extension(version_num) out += ( f" (`{file_extension.upper()} {file_size} <{path}/" f"_downloads/scikit-learn-docs.{file_extension}>`_)" ) - print(out) + rst_content.append(out) + +with open(args.rst, "w", encoding="utf-8") as f: + f.write("\n".join(rst_content) + "\n") +print(f"Written {args.rst}") + +with open(args.json, "w", encoding="utf-8") as f: + json.dump(json_content, f, indent=2) +print(f"Written {args.json}") diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh index c32a2d31fa811..f959b8b65c85c 100755 --- a/build_tools/circle/push_doc.sh +++ b/build_tools/circle/push_doc.sh @@ -1,8 +1,8 @@ #!/bin/bash # This script is meant to be called in the "deploy" step defined in -# circle.yml. See https://circleci.com/docs/ for more details. +# .circleci/config.yml. See https://circleci.com/docs/ for more details. # The behavior of the script is controlled by environment variable defined -# in the circle.yml in the top level folder of the project. +# in the .circleci/config.yml file. set -ex diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml index a6e5919ecc32f..6c5fa26020f35 100644 --- a/build_tools/cirrus/arm_tests.yml +++ b/build_tools/cirrus/arm_tests.yml @@ -8,13 +8,27 @@ linux_aarch64_test_task: memory: 6G env: CONDA_ENV_NAME: testenv - LOCK_FILE: build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock + LOCK_FILE: build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock CONDA_PKGS_DIRS: /root/.conda/pkgs - HOME: / # $HOME is not defined in image and is required to install mambaforge + HOME: / # $HOME is not defined in image and is required to install Miniforge + # Upload tokens have been encrypted via the CirrusCI interface: + # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables + # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. + BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] ccache_cache: folder: /root/.cache/ccache conda_cache: folder: /root/.conda/pkgs - fingerprint_script: cat build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock + fingerprint_script: cat build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock - test_script: bash build_tools/cirrus/build_test_arm.sh + install_python_script: | + # Install python so that update_tracking_issue has access to a Python + apt install -y python3 python-is-python3 + + test_script: | + bash build_tools/cirrus/build_test_arm.sh + # On success, this script is run updating the issue. + bash build_tools/cirrus/update_tracking_issue.sh true + + on_failure: + update_tracker_script: bash build_tools/cirrus/update_tracking_issue.sh false diff --git a/build_tools/cirrus/arm_wheel.yml b/build_tools/cirrus/arm_wheel.yml index ece984c320249..c3dfcfbc53ad9 100644 --- a/build_tools/cirrus/arm_wheel.yml +++ b/build_tools/cirrus/arm_wheel.yml @@ -1,45 +1,3 @@ -macos_arm64_wheel_task: - macos_instance: - image: ghcr.io/cirruslabs/macos-monterey-xcode - env: - CONFTEST_PATH: ${CIRRUS_WORKING_DIR}/conftest.py - CONFTEST_NAME: conftest.py - CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 - SKLEARN_BUILD_PARALLEL=5 - CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh - CIBW_TEST_REQUIRES: pytest pandas threadpoolctl pytest-xdist - CIBW_BUILD_VERBOSITY: 1 - PATH: $HOME/mambaforge/bin/:$PATH - CONDA_HOME: $HOME/mambaforge - # Upload tokens have been encrypted via the CirrusCI interface: - # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables - # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. - BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] - matrix: - - env: - CIBW_BUILD: cp38-macosx_arm64 - - env: - CIBW_BUILD: cp39-macosx_arm64 - - env: - CIBW_BUILD: cp310-macosx_arm64 - - env: - CIBW_BUILD: cp311-macosx_arm64 - - conda_script: - - curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh - - bash ~/mambaforge.sh -b -p ~/mambaforge - - cibuildwheel_script: - - bash build_tools/wheels/build_wheels.sh - - bash build_tools/cirrus/update_tracking_issue.sh true - - on_failure: - update_tracker_script: - - bash build_tools/cirrus/update_tracking_issue.sh false - - wheels_artifacts: - path: "wheelhouse/*" - linux_arm64_wheel_task: compute_engine_instance: image_project: cirrus-images @@ -49,8 +7,6 @@ linux_arm64_wheel_task: cpu: 4 memory: 4G env: - CONFTEST_PATH: ${CIRRUS_WORKING_DIR}/conftest.py - CONFTEST_NAME: conftest.py CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 SKLEARN_BUILD_PARALLEL=5 CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh @@ -61,19 +17,22 @@ linux_arm64_wheel_task: # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] matrix: - - env: - CIBW_BUILD: cp38-manylinux_aarch64 + # Only the latest Python version is tested - env: CIBW_BUILD: cp39-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" - env: CIBW_BUILD: cp310-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" - env: CIBW_BUILD: cp311-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" + - env: + CIBW_BUILD: cp312-manylinux_aarch64 cibuildwheel_script: - apt install -y python3 python-is-python3 - bash build_tools/wheels/build_wheels.sh - - bash build_tools/cirrus/update_tracking_issue.sh true on_failure: update_tracker_script: @@ -82,10 +41,19 @@ linux_arm64_wheel_task: wheels_artifacts: path: "wheelhouse/*" +# Update tracker when all jobs are successful +update_tracker_success: + depends_on: + - linux_arm64_wheel + container: + image: python:3.11 + # Only update tracker for nightly builds + only_if: $CIRRUS_CRON == "nightly" + update_script: + - bash build_tools/cirrus/update_tracking_issue.sh true wheels_upload_task: depends_on: - - macos_arm64_wheel - linux_arm64_wheel container: image: continuumio/miniconda3:22.11.1 @@ -94,16 +62,12 @@ wheels_upload_task: env: # Upload tokens have been encrypted via the CirrusCI interface: # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables - SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[8f20120b18a07d8a11192b98bff1f562883558e1f4c53f8ead1577113785a4105ee6f14ad9b5dacf1803c19c4913fe1c] + SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[9cf0529227577d503f2e19ef31cb690a2272cb243a217fb9a1ceda5cc608e8ccc292050fde9dca94cab766e1dd418519] SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ENCRYPTED[8fade46af37fa645e57bd1ee21683337aa369ba56f6307ce13889f1e74df94e5bdd21d323baac21e332fd87b8949659a] ARTIFACTS_PATH: wheelhouse upload_script: | conda install curl unzip -y - if [[ "$CIRRUS_CRON" == "nightly" ]]; then - export GITHUB_EVENT_NAME="schedule" - fi - # Download and show wheels curl https://api.cirrus-ci.com/v1/artifact/build/$CIRRUS_BUILD_ID/wheels.zip --output wheels.zip unzip wheels.zip diff --git a/build_tools/cirrus/build_test_arm.sh b/build_tools/cirrus/build_test_arm.sh index 4eeef6ec2dc0c..7ab95200bee50 100755 --- a/build_tools/cirrus/build_test_arm.sh +++ b/build_tools/cirrus/build_test_arm.sh @@ -22,19 +22,16 @@ setup_ccache() { ccache -M 0 } -MAMBAFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-aarch64.sh" - -# Install Mambaforge -wget $MAMBAFORGE_URL -O mambaforge.sh -MAMBAFORGE_PATH=$HOME/mambaforge -bash ./mambaforge.sh -b -p $MAMBAFORGE_PATH -export PATH=$MAMBAFORGE_PATH/bin:$PATH -mamba init --all --verbose -mamba update --yes mamba -mamba update --yes conda -mamba install "$(get_dep conda-lock min)" -y -conda-lock install --name $CONDA_ENV_NAME $LOCK_FILE -source activate $CONDA_ENV_NAME +# Install Miniforge +MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh" +curl -L --retry 10 $MINIFORGE_URL -o miniconda.sh +MINIFORGE_PATH=$HOME/miniforge3 +bash ./miniconda.sh -b -p $MINIFORGE_PATH +source $MINIFORGE_PATH/etc/profile.d/conda.sh +conda activate + +create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE +conda activate $CONDA_ENV_NAME setup_ccache diff --git a/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock deleted file mode 100644 index 8234eb15a0820..0000000000000 --- a/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock +++ /dev/null @@ -1,101 +0,0 @@ -# Generated by conda-lock. -# platform: linux-aarch64 -# input_hash: de5bfe2a68b349f08233af7b94fc3b2045503b21289e8d3bdb30a1613fd0ddb8 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2023.5.7-hcefe29a_0.conda#331e624442b88d96bc05a7f2d38c61a4 -https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-h2d8c526_0.conda#16246d69e945d0b1969a6099e7c5d457 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-13.1.0-h24e4805_0.conda#069e75bfdbed7744ee64a2b840fccc4e -https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-13.1.0-h452befe_0.conda#572f5798bb3d4cc79650f0ca3149aeaa -https://conda.anaconda.org/conda-forge/linux-aarch64/python_abi-3.9-3_cp39.conda#b6f330b045cf3425945d536a6b5cd240 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-13.1.0-he9431aa_0.conda#acd975de7f9506ff2514ef0addca1481 -https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#98a1185182fec3c434069fa74e6473d6 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-13.1.0-h2b4548d_0.conda#02619409d02932e28d694144b509597d -https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-hf897c2e_4.tar.bz2#2d787570a729e273a4e75775ddf3348a -https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.0.9-h4e544f5_8.tar.bz2#3cedc3935cfaa2a5303daa25fb12cb1d -https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.18-hb4cce97_0.conda#e0d520842c0ae66b560cc65f9b96f658 -https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501 -https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee -https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-2.1.5.1-hb4cce97_0.conda#89a30f83837239a008593afb78d210f2 -https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.0-hf897c2e_0.tar.bz2#36fdbc05c9d9145ece86f5a63c3f352e -https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.23-pthreads_hd703e6f_0.conda#b8265d6197f98ed95a6cc2aa5efb708b -https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969 -https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.3.0-hb4cce97_0.conda#53670eaee6d77d9fe60a84f7fd226a4c -https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h4e544f5_4.tar.bz2#88596b6277fe6d39f046983aae6044db -https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.4-h2e1726e_0.conda#40beaf447150c2760affc591c7509595 -https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.1.1-h31becfc_1.conda#a8e811c3390d93e5db0cef68e52f349f -https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.11-h31becfc_0.conda#13de34f69cb73165dbe08c1e9148bedb -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd -https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df -https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-17_linuxaarch64_openblas.conda#28fabad08c2cc13f3fd507cfaeb12b7c -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.0.9-h4e544f5_8.tar.bz2#319956380b383ec9f6a46d585599c028 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.0.9-h4e544f5_8.tar.bz2#56a0a025208af24e2b43b2bbeee79802 -https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.39-hf9034f9_0.conda#5ec9052384a6ac85e9111e9ac7c5ec4c -https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.42.0-h194ca79_0.conda#5fc895d5063af554f24a7eb69faff054 -https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.15-h2a766a3_0.conda#eb3d8c8170e3d03f2564ed2024aa00c8 -https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.23-pthreads_hef96516_0.conda#be3708e4cd351496c0ca051b552f4e04 -https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655 -https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.12-hd8af866_0.tar.bz2#7894e82ff743bd96c76585ddebe28e2a -https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.2-h44f6412_6.conda#6d0d1cd6d184129eabb96bb220afb5b2 -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.0.9-h4e544f5_8.tar.bz2#0980429a0148a53edd0f1f207ec28a39 -https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.8.1-h6552966_0.conda#5b436a19e818f05fe0c9ab4f5ac61233 -https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hbbbf32d_1.conda#e0891290982420d67651589c8584eec3 -https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-17_linuxaarch64_openblas.conda#41ed49a8f3a083999c2e733ddc2d4471 -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-17_linuxaarch64_openblas.conda#362f230b41a01afb0445abd526a8d3e1 -https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.5.0-h536c0eb_6.conda#75a0916176030b99c03ca2ecfe961128 -https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-16.0.5-h8b0cb96_0.conda#758ab64e00194a2171aea78bb8666d53 -https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.16-hb363c5e_0_cpython.conda#0a7ef29549eaef817898062eeeefebd3 -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.0.9-h4e544f5_8.tar.bz2#259d82bd990ba225508389509634b157 -https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-aarch64/cython-0.29.35-py39h387a81e_0.conda#e8ba01e9056aca19ffd7df2479f3c6ce -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.4-py39h110580c_1.tar.bz2#9c045502f6ab8c89bfda6be3c389e503 -https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.15-h3e0bdec_1.conda#5d6c6a9042e2316cec7410dd085814d1 -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-17_linuxaarch64_openblas.conda#1522e3323e898ae9fadd11424a3c0b75 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.24.3-py39hf88902c_0.conda#dc4187f9993e49b36eb9c61ce63ed3c5 -https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.0-h9508984_2.conda#3d56d402a845c243f8c2dd3c8e836029 -https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.3.2-py39h7cc1d5f_0.conda#2c853c8bb419699667c452a01f69749f -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352 -https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.0.0-py39h0fd3b05_0.tar.bz2#835f1a9631e600e0176593e95e85f73f -https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-17_linuxaarch64_openblas.conda#d8a3c0b2b389b2a64b3a1b5e59ae2e09 -https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.0.7-py39hd9a2fea_0.conda#efa783bf5c2b30aba3cf22599fe0274e -https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.39.4-py39h898b7ef_0.conda#c10973b2dc04e82014938c14b919e6e0 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-9.5.0-py39hc5b5638_1.conda#0560194d0eab633c666299c993869cca -https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.117-openblas.conda#5f88c5a193286ed9a87afd4b815e8c70 -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b -https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4 -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.7.1-py39h2983639_0.conda#6ca14f00270585ac4ff20b04106817ee -https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.7.1-py39ha65689a_0.conda#ba11d081599ada176b3ca99821e1b753 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.10.1-py39hf88902c_3.conda#032bb28beb0c37c48b6e33dadc18f0ec diff --git a/build_tools/cirrus/py39_conda_forge_environment.yml b/build_tools/cirrus/pymin_conda_forge_environment.yml similarity index 87% rename from build_tools/cirrus/py39_conda_forge_environment.yml rename to build_tools/cirrus/pymin_conda_forge_environment.yml index 70aedd73bf883..e41cc7f610ac0 100644 --- a/build_tools/cirrus/py39_conda_forge_environment.yml +++ b/build_tools/cirrus/pymin_conda_forge_environment.yml @@ -13,8 +13,10 @@ dependencies: - threadpoolctl - matplotlib - pytest - - pytest-xdist=2.5.0 + - pytest-xdist - pillow - - setuptools + - pip + - ninja + - meson-python - pip - ccache diff --git a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock new file mode 100644 index 0000000000000..edc83e3689ccc --- /dev/null +++ b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock @@ -0,0 +1,94 @@ +# Generated by conda-lock. +# platform: linux-aarch64 +# input_hash: 2d8c526ab7c0c2f0ca509bfec3f035e5bd33b8096f194f0747f167c8aff66383 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2024.6.2-hcefe29a_0.conda#3ef6b1a30375f8a973a593698e317191 +https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-h9fc2d93_7.conda#1b0feef706f4d03eff0b76626ead64fc +https://conda.anaconda.org/conda-forge/linux-aarch64/python_abi-3.9-4_cp39.conda#c191905a08694e4a5cb1238e90233878 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#98a1185182fec3c434069fa74e6473d6 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-14.1.0-he277a41_0.conda#47ecd1292a3fd78b616640b35dd9632c +https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h31becfc_5.conda#a64e35f01e0b7a2a152eca87d33b9c87 +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.1.0-h31becfc_1.conda#1b219fd801eddb7a94df5bd001053ad9 +https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.20-h31becfc_0.conda#018592a3d691662f451f89d0de474a20 +https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-14.1.0-h9420597_0.conda#b907b29b964b8ebd7be215e47a659179 +https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.0.0-h31becfc_1.conda#ed24e702928be089d9ba3f05618515c6 +https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda#c14f32510f694e3185704d89967ec422 +https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-14.1.0-h3f4de04_0.conda#2f84852b723ac4389eb188db695526bb +https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969 +https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.4.0-h31becfc_0.conda#5fd7ab3e5f382c70607fbac6335e6e19 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda#b4df5d7d4b63579d081fd3a4cf99740e +https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.1-h68df207_1.conda#b13fb82f88902e34dd0638cd7d378c21 +https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-h0425590_0.conda#38362af7bfac0efef69675acee564458 +https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.3.1-h68df207_1.conda#8349df397000d7a7acb514d97879fe09 +https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.11-h31becfc_0.conda#13de34f69cb73165dbe08c1e9148bedb +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd +https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df +https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476 +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h31becfc_1.conda#8db7cff89510bec0b863a0a8ee6a7bce +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h31becfc_1.conda#ad3d3a826b5848d99936e4466ebbaa26 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-14.1.0-he9431aa_0.conda#a50ae662c1e7f26f0f2c99e31d1bf614 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.43-h194ca79_0.conda#1123e504d9254dd9494267ab9aba95f0 +https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.46.0-hf51ef55_0.conda#a8ae63fd6fb7d007f74ef3df95e5edf3 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.16-h7935292_0.conda#93c0136e9cba96657339dfe25fba4da7 +https://conda.anaconda.org/conda-forge/linux-aarch64/ninja-1.12.1-h70be974_0.conda#216635cea46498d8045c7cf0f03eaf72 +https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655 +https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda#f75105e0585851f818e0009dd1dde4dc +https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.6-h02f22dd_0.conda#be8d5f8cf21aed237b8b182ea86b3dd6 +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-h31becfc_1.conda#9e4a13596ab651ea8d77aae023d0ce3f +https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hf0a5ef3_2.conda#a5ab74c5bd158c3d5532b66d8d83d907 +https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee +https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.27-pthreads_h5a5ec62_0.conda#ffecca8f4f31cd50b92c0e6e6bfe4416 +https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.6.0-hf980d43_3.conda#b6f3abf5726ae33094bee238b4eb492f +https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-18.1.8-hb063fc5_0.conda#f0cf07feda9ed87092833cd8fca012f5 +https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.19-h4ac3b42_0_cpython.conda#1501507cd9451472ec8900d587ce872f +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-h31becfc_1.conda#e41f5862ac746428407f3fd44d2ed01f +https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.9.1-h6552966_0.conda#758b202f61f6bbfd2c6adf0fde043276 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.6.2-pyhd8ed1ab_0.conda#8821ec1c8fcdc9e1d291d7b9f6e9968a +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-aarch64/cython-3.0.10-py39h387a81e_0.conda#0e917a89f77c978d152099357bd75b22 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.5-py39had2cf8c_1.conda#ddb99610f7b950fdd5ff2aff19136363 +https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.16-h922389a_0.conda#ffdd8267a04c515e7ce69c727b051414 +https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-22_linuxaarch64_openblas.conda#068ab33f2382cda4dd0b72a715ad33b5 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.27-pthreads_h339cbfa_0.conda#cb06c34a3056f59e9e244c20836add8a +https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.2-h0d9d63b_0.conda#fd2898519e839d5ceb778343f39a3176 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/setuptools-70.1.1-pyhd8ed1ab_0.conda#985e9e86e1b0fc75a74a9bfab9309ef7 +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4.1-py39ha3e8b56_0.conda#60ad0fcecca6af49fe5888a408618d8a +https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.1.0-py39h898b7ef_0.conda#8c072c9329aeea97a46005625267a851 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.19.2-pyhd8ed1ab_0.conda#49808e59df5535116f6878b2a820d6f4 +https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.53.0-py39he257ee7_0.conda#de1dc44f6b1a4393b37f4264d8e31e2a +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-22_linuxaarch64_openblas.conda#fbe7fe553f2cc78a0311e009b26f180d +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-22_linuxaarch64_openblas.conda#8c709d281609792c39b1d5c0241f90f1 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.1-pyhd8ed1ab_0.conda#714ca123839eeebb25d12b443067ea64 +https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-10.3.0-py39h4a8821f_1.conda#12b4464ffbad162e28b7bf6e81129501 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.2.2-pyhd8ed1ab_0.conda#0f3f49c22c7ef3a1195fa61dad3c43be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-22_linuxaarch64_openblas.conda#5acf669e0be669f30f4b813d2ecda7b8 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.0.0-py39hcdcdb6f_0.conda#8ba5676e972e45d1c0d947f169c6e914 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0 +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-22_linuxaarch64_openblas.conda#a5b77b6c6807661afd716f33e85814b3 +https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.2.1-py39hd16970a_0.conda#66b9718539ecdd38876b0176c315bcad +https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.13.1-py39hb921187_0.conda#1aac9080de661e03d286f18fb71e5240 +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.122-openblas.conda#65bc48b3bc85f8eeeab54311443a83aa +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.8.4-py39hf44f4b6_2.conda#fadf734d38ed608c9f0b5c91fe79cfb4 +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.8.4-py39ha65689a_2.conda#c0472e3c4b3f007de6d643317c30963b diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index d4da0db5be3c1..483dc3739506e 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -6,12 +6,14 @@ The table should be updated for each new inclusion in the teams. Generating the table requires admin rights. """ -import sys -import requests + import getpass +import sys import time -from pathlib import Path from os import path +from pathlib import Path + +import requests print("user:", file=sys.stderr) user = input() @@ -42,17 +44,24 @@ def get_contributors(): """Get the list of contributor profiles. Require admin rights.""" # get core devs and contributor experience team core_devs = [] + documentation_team = [] contributor_experience_team = [] comm_team = [] core_devs_slug = "core-devs" contributor_experience_team_slug = "contributor-experience-team" comm_team_slug = "communication-team" + documentation_team_slug = "documentation-team" entry_point = "https://api.github.com/orgs/scikit-learn/" for team_slug, lst in zip( - (core_devs_slug, contributor_experience_team_slug, comm_team_slug), - (core_devs, contributor_experience_team, comm_team), + ( + core_devs_slug, + contributor_experience_team_slug, + comm_team_slug, + documentation_team_slug, + ), + (core_devs, contributor_experience_team, comm_team, documentation_team), ): for page in [1, 2]: # 30 per page reply = get(f"{entry_point}teams/{team_slug}/members?page={page}") @@ -66,6 +75,7 @@ def get_contributors(): # keep only the logins core_devs = set(c["login"] for c in core_devs) + documentation_team = set(c["login"] for c in documentation_team) contributor_experience_team = set(c["login"] for c in contributor_experience_team) comm_team = set(c["login"] for c in comm_team) members = set(c["login"] for c in members) @@ -80,11 +90,23 @@ def get_contributors(): core_devs # remove ogrisel from contributor_experience_team ) - emeritus = members - core_devs - contributor_experience_team - comm_team + emeritus = ( + members + - core_devs + - contributor_experience_team + - comm_team + - documentation_team + ) # hard coded + emeritus_contributor_experience_team = { + "cmarmo", + } emeritus_comm_team = {"reshamas"} + # Up-to-now, we can subtract the team emeritus from the original emeritus + emeritus -= emeritus_contributor_experience_team | emeritus_comm_team + comm_team -= {"reshamas"} # in the comm team but not on the web page # get profiles from GitHub @@ -93,13 +115,21 @@ def get_contributors(): contributor_experience_team = [ get_profile(login) for login in contributor_experience_team ] + emeritus_contributor_experience_team = [ + get_profile(login) for login in emeritus_contributor_experience_team + ] comm_team = [get_profile(login) for login in comm_team] emeritus_comm_team = [get_profile(login) for login in emeritus_comm_team] + documentation_team = [get_profile(login) for login in documentation_team] # sort by last name core_devs = sorted(core_devs, key=key) emeritus = sorted(emeritus, key=key) contributor_experience_team = sorted(contributor_experience_team, key=key) + emeritus_contributor_experience_team = sorted( + emeritus_contributor_experience_team, key=key + ) + documentation_team = sorted(documentation_team, key=key) comm_team = sorted(comm_team, key=key) emeritus_comm_team = sorted(emeritus_comm_team, key=key) @@ -107,8 +137,10 @@ def get_contributors(): core_devs, emeritus, contributor_experience_team, + emeritus_contributor_experience_team, comm_team, emeritus_comm_team, + documentation_team, ) @@ -176,15 +208,19 @@ def generate_list(contributors): core_devs, emeritus, contributor_experience_team, + emeritus_contributor_experience_team, comm_team, emeritus_comm_team, + documentation_team, ) = get_contributors() - with open(REPO_FOLDER / "doc" / "authors.rst", "w+", encoding="utf-8") as rst_file: + with open( + REPO_FOLDER / "doc" / "maintainers.rst", "w+", encoding="utf-8" + ) as rst_file: rst_file.write(generate_table(core_devs)) with open( - REPO_FOLDER / "doc" / "authors_emeritus.rst", "w+", encoding="utf-8" + REPO_FOLDER / "doc" / "maintainers_emeritus.rst", "w+", encoding="utf-8" ) as rst_file: rst_file.write(generate_list(emeritus)) @@ -193,6 +229,13 @@ def generate_list(contributors): ) as rst_file: rst_file.write(generate_table(contributor_experience_team)) + with open( + REPO_FOLDER / "doc" / "contributor_experience_team_emeritus.rst", + "w+", + encoding="utf-8", + ) as rst_file: + rst_file.write(generate_list(emeritus_contributor_experience_team)) + with open( REPO_FOLDER / "doc" / "communication_team.rst", "w+", encoding="utf-8" ) as rst_file: @@ -202,3 +245,8 @@ def generate_list(contributors): REPO_FOLDER / "doc" / "communication_team_emeritus.rst", "w+", encoding="utf-8" ) as rst_file: rst_file.write(generate_list(emeritus_comm_team)) + + with open( + REPO_FOLDER / "doc" / "documentation_team.rst", "w+", encoding="utf-8" + ) as rst_file: + rst_file.write(generate_table(documentation_team)) diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py new file mode 100644 index 0000000000000..b357c68f23e3e --- /dev/null +++ b/build_tools/get_comment.py @@ -0,0 +1,356 @@ +# This script is used to generate a comment for a PR when linting issues are +# detected. It is used by the `Comment on failed linting` GitHub Action. +# This script fails if there are not comments to be posted. + +import os + +import requests + + +def get_versions(versions_file): + """Get the versions of the packages used in the linter job. + + Parameters + ---------- + versions_file : str + The path to the file that contains the versions of the packages. + + Returns + ------- + versions : dict + A dictionary with the versions of the packages. + """ + with open("versions.txt", "r") as f: + return dict(line.strip().split("=") for line in f) + + +def get_step_message(log, start, end, title, message, details): + """Get the message for a specific test. + + Parameters + ---------- + log : str + The log of the linting job. + + start : str + The string that marks the start of the test. + + end : str + The string that marks the end of the test. + + title : str + The title for this section. + + message : str + The message to be added at the beginning of the section. + + details : bool + Whether to add the details of each step. + + Returns + ------- + message : str + The message to be added to the comment. + """ + if end not in log: + return "" + res = ( + "-----------------------------------------------\n" + + f"### {title}\n\n" + + message + + "\n\n" + ) + if details: + res += ( + "
\n\n```\n" + + log[log.find(start) + len(start) + 1 : log.find(end) - 1] + + "\n```\n\n
\n\n" + ) + return res + + +def get_message(log_file, repo, pr_number, sha, run_id, details, versions): + with open(log_file, "r") as f: + log = f.read() + + sub_text = ( + "\n\n _Generated for commit:" + f" [{sha[:7]}](https://github.com/{repo}/pull/{pr_number}/commits/{sha}). " + "Link to the linter CI: [here]" + f"(https://github.com/{repo}/actions/runs/{run_id})_ " + ) + + if "### Linting completed ###" not in log: + return ( + "## ❌ Linting issues\n\n" + "There was an issue running the linter job. Please update with " + "`upstream/main` ([link](" + "https://scikit-learn.org/dev/developers/contributing.html" + "#how-to-contribute)) and push the changes. If you already have done " + "that, please send an empty commit with `git commit --allow-empty` " + "and push the changes to trigger the CI.\n\n" + sub_text + ) + + message = "" + + # black + message += get_step_message( + log, + start="### Running black ###", + end="Problems detected by black", + title="`black`", + message=( + "`black` detected issues. Please run `black .` locally and push " + "the changes. Here you can see the detected issues. Note that " + "running black might also fix some of the issues which might be " + "detected by `ruff`. Note that the installed `black` version is " + f"`black={versions['black']}`." + ), + details=details, + ) + + # ruff + message += get_step_message( + log, + start="### Running ruff ###", + end="Problems detected by ruff", + title="`ruff`", + message=( + "`ruff` detected issues. Please run " + "`ruff check --fix --output-format=full .` locally, fix the remaining " + "issues, and push the changes. Here you can see the detected issues. Note " + f"that the installed `ruff` version is `ruff={versions['ruff']}`." + ), + details=details, + ) + + # mypy + message += get_step_message( + log, + start="### Running mypy ###", + end="Problems detected by mypy", + title="`mypy`", + message=( + "`mypy` detected issues. Please fix them locally and push the changes. " + "Here you can see the detected issues. Note that the installed `mypy` " + f"version is `mypy={versions['mypy']}`." + ), + details=details, + ) + + # cython-lint + message += get_step_message( + log, + start="### Running cython-lint ###", + end="Problems detected by cython-lint", + title="`cython-lint`", + message=( + "`cython-lint` detected issues. Please fix them locally and push " + "the changes. Here you can see the detected issues. Note that the " + "installed `cython-lint` version is " + f"`cython-lint={versions['cython-lint']}`." + ), + details=details, + ) + + # deprecation order + message += get_step_message( + log, + start="### Checking for bad deprecation order ###", + end="Problems detected by deprecation order check", + title="Deprecation Order", + message=( + "Deprecation order check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # doctest directives + message += get_step_message( + log, + start="### Checking for default doctest directives ###", + end="Problems detected by doctest directive check", + title="Doctest Directives", + message=( + "doctest directive check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # joblib imports + message += get_step_message( + log, + start="### Checking for joblib imports ###", + end="Problems detected by joblib import check", + title="Joblib Imports", + message=( + "`joblib` import check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + if not message: + # no issues detected, so this script "fails" + return ( + "## âœ”ī¸ Linting Passed\n" + "All linting checks passed. Your pull request is in excellent shape! â˜€ī¸" + + sub_text + ) + + if not details: + # This happens if posting the log fails, which happens if the log is too + # long. Typically, this happens if the PR branch hasn't been updated + # since we've introduced import sorting. + branch_not_updated = ( + "_Merging with `upstream/main` might fix / improve the issues if you " + "haven't done that since 21.06.2023._\n\n" + ) + else: + branch_not_updated = "" + + message = ( + "## ❌ Linting issues\n\n" + + branch_not_updated + + "This PR is introducing linting issues. Here's a summary of the issues. " + + "Note that you can avoid having linting issues by enabling `pre-commit` " + + "hooks. Instructions to enable them can be found [here](" + + "https://scikit-learn.org/dev/developers/contributing.html#how-to-contribute)" + + ".\n\n" + + "You can see the details of the linting issues under the `lint` job [here]" + + f"(https://github.com/{repo}/actions/runs/{run_id})\n\n" + + message + + sub_text + ) + + return message + + +def get_headers(token): + """Get the headers for the GitHub API.""" + return { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + } + + +def find_lint_bot_comments(repo, token, pr_number): + """Get the comment from the linting bot.""" + # repo is in the form of "org/repo" + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments # noqa + response = requests.get( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + ) + response.raise_for_status() + all_comments = response.json() + + failed_comment = "❌ Linting issues" + success_comment = "âœ”ī¸ Linting Passed" + + # Find all comments that match the linting bot, and return the first one. + # There should always be only one such comment, or none, if the PR is + # just created. + comments = [ + comment + for comment in all_comments + if comment["user"]["login"] == "github-actions[bot]" + and (failed_comment in comment["body"] or success_comment in comment["body"]) + ] + + if len(all_comments) > 25 and not comments: + # By default the API returns the first 30 comments. If we can't find the + # comment created by the bot in those, then we raise and we skip creating + # a comment in the first place. + raise RuntimeError("Comment not found in the first 30 comments.") + + return comments[0] if comments else None + + +def create_or_update_comment(comment, message, repo, pr_number, token): + """Create a new comment or update existing one.""" + # repo is in the form of "org/repo" + if comment is not None: + print("updating existing comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment # noqa + response = requests.patch( + f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}", + headers=get_headers(token), + json={"body": message}, + ) + else: + print("creating new comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment # noqa + response = requests.post( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + json={"body": message}, + ) + + response.raise_for_status() + + +if __name__ == "__main__": + repo = os.environ["GITHUB_REPOSITORY"] + token = os.environ["GITHUB_TOKEN"] + pr_number = os.environ["PR_NUMBER"] + sha = os.environ["BRANCH_SHA"] + log_file = os.environ["LOG_FILE"] + run_id = os.environ["RUN_ID"] + versions_file = os.environ["VERSIONS_FILE"] + + versions = get_versions(versions_file) + + if not repo or not token or not pr_number or not log_file or not run_id: + raise ValueError( + "One of the following environment variables is not set: " + "GITHUB_REPOSITORY, GITHUB_TOKEN, PR_NUMBER, LOG_FILE, RUN_ID" + ) + + try: + comment = find_lint_bot_comments(repo, token, pr_number) + except RuntimeError: + print("Comment not found in the first 30 comments. Skipping!") + exit(0) + + try: + message = get_message( + log_file, + repo=repo, + pr_number=pr_number, + sha=sha, + run_id=run_id, + details=True, + versions=versions, + ) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) + except requests.HTTPError: + # The above fails if the message is too long. In that case, we + # try again without the details. + message = get_message( + log_file, + repo=repo, + pr_number=pr_number, + sha=sha, + run_id=run_id, + details=False, + versions=versions, + ) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) diff --git a/build_tools/github/Windows b/build_tools/github/Windows index 5ba35f790ca5e..a9971aa525581 100644 --- a/build_tools/github/Windows +++ b/build_tools/github/Windows @@ -3,12 +3,10 @@ ARG PYTHON_VERSION FROM winamd64/python:$PYTHON_VERSION-windowsservercore ARG WHEEL_NAME -ARG CONFTEST_NAME ARG CIBW_TEST_REQUIRES # Copy and install the Windows wheel COPY $WHEEL_NAME $WHEEL_NAME -COPY $CONFTEST_NAME $CONFTEST_NAME RUN pip install $env:WHEEL_NAME # Install the testing dependencies diff --git a/build_tools/github/build_minimal_windows_image.sh b/build_tools/github/build_minimal_windows_image.sh index 4399bfa80704e..2995b6906c535 100755 --- a/build_tools/github/build_minimal_windows_image.sh +++ b/build_tools/github/build_minimal_windows_image.sh @@ -14,10 +14,12 @@ cp $WHEEL_PATH $WHEEL_NAME # Dot the Python version for identyfing the base Docker image PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2}) +if [[ "$CIBW_PRERELEASE_PYTHONS" == "True" ]]; then + PYTHON_VERSION="$PYTHON_VERSION-rc" +fi # Build a minimal Windows Docker image for testing the wheels docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \ --build-arg WHEEL_NAME=$WHEEL_NAME \ - --build-arg CONFTEST_NAME=$CONFTEST_NAME \ --build-arg CIBW_TEST_REQUIRES="$CIBW_TEST_REQUIRES" \ -f build_tools/github/Windows \ -t scikit-learn/minimal-windows . diff --git a/build_tools/github/build_source.sh b/build_tools/github/build_source.sh index a4d9c7bd05387..ec53284012fa4 100755 --- a/build_tools/github/build_source.sh +++ b/build_tools/github/build_source.sh @@ -11,10 +11,10 @@ python -m venv build_env source build_env/bin/activate python -m pip install numpy scipy cython -python -m pip install twine +python -m pip install twine build cd scikit-learn/scikit-learn -python setup.py sdist +python -m build --sdist # Check whether the source distribution will render correctly twine check dist/*.tar.gz diff --git a/build_tools/github/check_build_trigger.sh b/build_tools/github/check_build_trigger.sh index 3a38924aa23a7..e3a02c4834c34 100755 --- a/build_tools/github/check_build_trigger.sh +++ b/build_tools/github/check_build_trigger.sh @@ -7,6 +7,7 @@ COMMIT_MSG=$(git log --no-merges -1 --oneline) # The commit marker "[cd build]" or "[cd build gh]" will trigger the build when required if [[ "$GITHUB_EVENT_NAME" == schedule || + "$GITHUB_EVENT_NAME" == workflow_dispatch || "$COMMIT_MSG" =~ \[cd\ build\] || "$COMMIT_MSG" =~ \[cd\ build\ gh\] ]]; then echo "build=true" >> $GITHUB_OUTPUT diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index 99d319cba4dc5..5579d86c5ce3e 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -1,8 +1,10 @@ """Checks that dist/* contains the number of wheels built from the .github/workflows/wheels.yml config.""" -import yaml -from pathlib import Path + import sys +from pathlib import Path + +import yaml gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" with gh_wheel_path.open("r") as f: @@ -19,7 +21,6 @@ with cirrus_path.open("r") as f: cirrus_config = yaml.safe_load(f) -n_wheels += len(cirrus_config["macos_arm64_wheel_task"]["matrix"]) n_wheels += len(cirrus_config["linux_arm64_wheel_task"]["matrix"]) dist_files = list(Path("dist").glob("**/*")) diff --git a/build_tools/github/repair_windows_wheels.sh b/build_tools/github/repair_windows_wheels.sh index cdd0c0c79d8c4..8f51a34d4039b 100755 --- a/build_tools/github/repair_windows_wheels.sh +++ b/build_tools/github/repair_windows_wheels.sh @@ -8,6 +8,7 @@ DEST_DIR=$2 # By default, the Windows wheels are not repaired. # In this case, we need to vendor VCRUNTIME140.dll +pip install wheel wheel unpack "$WHEEL" WHEEL_DIRNAME=$(ls -d scikit_learn-*) python build_tools/github/vendor.py "$WHEEL_DIRNAME" diff --git a/build_tools/github/test_source.sh b/build_tools/github/test_source.sh index 3a65a657addec..c93d22a08e791 100755 --- a/build_tools/github/test_source.sh +++ b/build_tools/github/test_source.sh @@ -13,7 +13,6 @@ python -m pip install pytest pandas # Run the tests on the installed source distribution mkdir tmp_for_test -cp scikit-learn/scikit-learn/conftest.py tmp_for_test cd tmp_for_test pytest --pyargs sklearn diff --git a/build_tools/github/upload_anaconda.sh b/build_tools/github/upload_anaconda.sh index 60cab7f8dcf4a..42e06f17c5c47 100755 --- a/build_tools/github/upload_anaconda.sh +++ b/build_tools/github/upload_anaconda.sh @@ -3,8 +3,11 @@ set -e set -x -if [ "$GITHUB_EVENT_NAME" == "schedule" ]; then - ANACONDA_ORG="scipy-wheels-nightly" +# Note: build_wheels.sh has the same branch (only for NumPy 2.0 transition) +if [[ "$GITHUB_EVENT_NAME" == "schedule" \ + || "$GITHUB_EVENT_NAME" == "workflow_dispatch" \ + || "$CIRRUS_CRON" == "nightly" ]]; then + ANACONDA_ORG="scientific-python-nightly-wheels" ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN" else ANACONDA_ORG="scikit-learn-wheels-staging" diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py index 2997688423b84..28b44be3c9aa9 100644 --- a/build_tools/github/vendor.py +++ b/build_tools/github/vendor.py @@ -1,13 +1,11 @@ """Embed vcomp140.dll and msvcp140.dll.""" - import os import os.path as op import shutil import sys import textwrap - TARGET_FOLDER = op.join("sklearn", ".libs") DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py") VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll" diff --git a/build_tools/linting.sh b/build_tools/linting.sh index dd200b9d9cd95..aefabfae7b3f5 100755 --- a/build_tools/linting.sh +++ b/build_tools/linting.sh @@ -1,57 +1,125 @@ #!/bin/bash -set -e +# Note that any change in this file, adding or removing steps or changing the +# printed messages, should be also reflected in the `get_comment.py` file. + +# This script shouldn't exit if a command / pipeline fails +set +e # pipefail is necessary to propagate exit codes set -o pipefail +global_status=0 + +echo -e "### Running black ###\n" black --check --diff . -echo -e "No problem detected by black\n" +status=$? + +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by black\n" +else + echo -e "Problems detected by black, please run black and commit the result\n" + global_status=1 +fi -flake8 --show-source . -echo -e "No problem detected by flake8\n" +echo -e "### Running ruff ###\n" +ruff check --output-format=full . +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by ruff\n" +else + echo -e "Problems detected by ruff, please fix them\n" + global_status=1 +fi +echo -e "### Running mypy ###\n" mypy sklearn/ -echo -e "No problem detected by mypy\n" +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by mypy\n" +else + echo -e "Problems detected by mypy, please fix them\n" + global_status=1 +fi +echo -e "### Running cython-lint ###\n" cython-lint sklearn/ -echo -e "No problem detected by cython-lint\n" +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by cython-lint\n" +else + echo -e "Problems detected by cython-lint, please fix them\n" + global_status=1 +fi # For docstrings and warnings of deprecated attributes to be rendered -# properly, the property decorator must come before the deprecated decorator +# properly, the `deprecated` decorator must come before the `property` decorator # (else they are treated as functions) -# do not error when grep -B1 "@property" finds nothing -set +e +echo -e "### Checking for bad deprecation order ###\n" bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"` if [ ! -z "$bad_deprecation_property_order" ] then - echo "property decorator should come before deprecated decorator" + echo "deprecated decorator should come before property decorator" echo "found the following occurrences:" echo $bad_deprecation_property_order - exit 1 + echo -e "\nProblems detected by deprecation order check\n" + global_status=1 +else + echo -e "No problems detected related to deprecation order\n" fi # Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE +echo -e "### Checking for default doctest directives ###\n" doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")" if [ ! -z "$doctest_directive" ] then echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:" echo "$doctest_directive" - exit 1 + echo -e "\nProblems detected by doctest directive check\n" + global_status=1 +else + echo -e "No problems detected related to doctest directives\n" fi +# Check for joblib.delayed and joblib.Parallel imports +# TODO(1.7): remove ":!sklearn/utils/_joblib.py" +echo -e "### Checking for joblib imports ###\n" +joblib_status=0 joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_delayed_import" ]; then echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:" echo "$joblib_delayed_import" - exit 1 + joblib_status=1 fi joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_Parallel_import" ]; then echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:" echo "$joblib_Parallel_import" + joblib_status=1 +fi + +if [[ $joblib_status -eq 0 ]] +then + echo -e "No problems detected related to joblib imports\n" +else + echo -e "\nProblems detected by joblib import check\n" + global_status=1 +fi + +echo -e "### Linting completed ###\n" + +if [[ $global_status -eq 1 ]] +then + echo -e "Linting failed\n" exit 1 +else + echo -e "Linting passed\n" + exit 0 fi diff --git a/build_tools/shared.sh b/build_tools/shared.sh index 4866c149d506f..185880ef6de03 100644 --- a/build_tools/shared.sh +++ b/build_tools/shared.sh @@ -33,3 +33,19 @@ activate_environment() { source $VIRTUALENV/bin/activate fi } + +create_conda_environment_from_lock_file() { + ENV_NAME=$1 + LOCK_FILE=$2 + # Because we are using lock-files with the "explicit" format, conda can + # install them directly, provided the lock-file does not contain pip solved + # packages. For more details, see + # https://conda.github.io/conda-lock/output/#explicit-lockfile + lock_file_has_pip_packages=$(grep -q files.pythonhosted.org $LOCK_FILE && echo "true" || echo "false") + if [[ "$lock_file_has_pip_packages" == "false" ]]; then + conda create --name $ENV_NAME --file $LOCK_FILE + else + conda install "$(get_dep conda-lock min)" -y + conda-lock install --name $ENV_NAME $LOCK_FILE + fi +} diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index 28910a07d899a..ce457cabb1e53 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -5,8 +5,11 @@ Two scenarios where this script can be useful: - make sure that the latest versions of all the dependencies are used in the CI. - We can run this script regularly and open a PR with the changes to the lock - files. This workflow will eventually be automated with a bot in the future. + There is a scheduled workflow that does this, see + .github/workflows/update-lock-files.yml. This is still useful to run this + script when when the automated PR fails and for example some packages need to + be pinned. You can add the pins to this script, run it, and open a PR with + the changes. - bump minimum dependencies in sklearn/_min_dependencies.py. Running this script will update both the CI environment files and associated lock files. You can then open a PR with the changes. @@ -27,26 +30,31 @@ sklearn/_min_dependencies.py - pip-tools +To only update the environment and lock files for specific builds, you can use +the command line argument `--select-build` which will take a regex. For example, +to only update the documentation builds you can use: +`python build_tools/update_environments_and_lock_files.py --select-build doc` """ +import json +import logging import re import subprocess import sys -from pathlib import Path -import shlex -import json -import logging from importlib.metadata import version +from pathlib import Path import click - from jinja2 import Environment +from packaging.version import Version logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) handler = logging.StreamHandler() logger.addHandler(handler) +TRACE = logging.DEBUG - 5 + common_dependencies_without_coverage = [ "python", @@ -62,7 +70,9 @@ "pytest", "pytest-xdist", "pillow", - "setuptools", + "pip", + "ninja", + "meson-python", ] common_dependencies = common_dependencies_without_coverage + [ @@ -72,28 +82,30 @@ docstring_test_dependencies = ["sphinx", "numpydoc"] -default_package_constraints = { - # XXX: pin pytest-xdist to workaround: - # https://github.com/pytest-dev/pytest-xdist/issues/840 - "pytest-xdist": "2.5.0", -} +default_package_constraints = {} def remove_from(alist, to_remove): return [each for each in alist if each not in to_remove] -conda_build_metadata_list = [ +build_metadata_list = [ { - "build_name": "pylatest_conda_forge_mkl_linux-64", + "name": "pylatest_conda_forge_mkl_linux-64", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies + [ + "conda_dependencies": common_dependencies + + [ "ccache", "pytorch", "pytorch-cpu", + "polars", + "pyarrow", "array-api-compat", + "array-api-strict", ], "package_constraints": { "blas": "[build=mkl]", @@ -101,11 +113,14 @@ def remove_from(alist, to_remove): }, }, { - "build_name": "pylatest_conda_forge_mkl_osx-64", + "name": "pylatest_conda_forge_mkl_osx-64", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies + [ + "conda_dependencies": common_dependencies + + [ "ccache", "compilers", "llvm-openmp", @@ -115,56 +130,80 @@ def remove_from(alist, to_remove): }, }, { - "build_name": "pylatest_conda_mkl_no_openmp", + "name": "pylatest_conda_mkl_no_openmp", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", "channel": "defaults", - "conda_dependencies": common_dependencies + ["ccache"], - "package_constraints": { - "blas": "[build=mkl]", - }, - }, - { - "build_name": "pylatest_conda_forge_mkl_no_coverage", - "folder": "build_tools/azure", - "platform": "linux-64", - "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + ["ccache"], + "conda_dependencies": remove_from( + common_dependencies, ["cython", "threadpoolctl", "meson-python"] + ) + + ["ccache"], "package_constraints": { "blas": "[build=mkl]", + # scipy 1.12.x crashes on this platform (https://github.com/scipy/scipy/pull/20086) + # TODO: release scipy constraint when 1.13 is available in the "default" + # channel. + "scipy": "<1.12", }, + # TODO: put cython, threadpoolctl and meson-python back to conda + # dependencies when required version is available on the main channel + "pip_dependencies": ["cython", "threadpoolctl", "meson-python"], }, { - "build_name": "py38_conda_defaults_openblas", + "name": "pymin_conda_defaults_openblas", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", - "conda_dependencies": common_dependencies + ["ccache"], + "conda_dependencies": remove_from( + common_dependencies, + ["pandas", "threadpoolctl", "pip", "ninja", "meson-python"], + ) + + ["ccache"], "package_constraints": { - "python": "3.8", + "python": "3.9", "blas": "[build=openblas]", - "numpy": "min", - "scipy": "min", + "numpy": "1.21", # the min version is not available on the defaults channel + "scipy": "1.7", # the min version has some low level crashes "matplotlib": "min", - "threadpoolctl": "2.2.0", + "cython": "min", + "joblib": "min", + "threadpoolctl": "min", }, + # TODO: put pip dependencies back to conda dependencies when required + # version is available on the defaults channel. + "pip_dependencies": ["threadpoolctl"], }, { - "build_name": "py38_conda_forge_openblas_ubuntu_2204", + "name": "pymin_conda_forge_openblas_ubuntu_2204", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + ["ccache"], - "package_constraints": {"python": "3.8", "blas": "[build=openblas]"}, + "conda_dependencies": ( + common_dependencies_without_coverage + + docstring_test_dependencies + + ["ccache"] + ), + "package_constraints": { + "python": "3.9", + "blas": "[build=openblas]", + }, }, { - "build_name": "pylatest_pip_openblas_pandas", + "name": "pylatest_pip_openblas_pandas", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", "conda_dependencies": ["python", "ccache"], "pip_dependencies": ( - remove_from(common_dependencies, ["python", "blas"]) + remove_from(common_dependencies, ["python", "blas", "pip"]) + docstring_test_dependencies + ["lightgbm", "scikit-image"] ), @@ -173,7 +212,9 @@ def remove_from(alist, to_remove): }, }, { - "build_name": "pylatest_pip_scipy_dev", + "name": "pylatest_pip_scipy_dev", + "type": "conda", + "tag": "scipy-dev", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", @@ -205,7 +246,9 @@ def remove_from(alist, to_remove): ), }, { - "build_name": "pypy3", + "name": "pypy3", + "type": "conda", + "tag": "pypy", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", @@ -222,39 +265,53 @@ def remove_from(alist, to_remove): }, }, { - "build_name": "py38_conda_forge_mkl", + "name": "pymin_conda_forge_mkl", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "win-64", "channel": "conda-forge", - "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + [ + "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + + [ "wheel", "pip", ], "package_constraints": { - "python": "3.8", + "python": "3.9", "blas": "[build=mkl]", }, }, { - "build_name": "doc_min_dependencies", + "name": "doc_min_dependencies", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + [ + "conda_dependencies": common_dependencies_without_coverage + + [ "scikit-image", "seaborn", "memory_profiler", "compilers", "sphinx", "sphinx-gallery", + "sphinx-copybutton", "numpydoc", "sphinx-prompt", "plotly", + "polars", "pooch", + "sphinx-remove-toctrees", + "sphinx-design", + "pydata-sphinx-theme", + ], + "pip_dependencies": [ + "sphinxext-opengraph", + "sphinxcontrib-sass", ], - "pip_dependencies": ["sphinxext-opengraph"], "package_constraints": { - "python": "3.8", + "python": "3.9", "numpy": "min", "scipy": "min", "matplotlib": "min", @@ -263,55 +320,73 @@ def remove_from(alist, to_remove): "sphinx": "min", "pandas": "min", "sphinx-gallery": "min", + "sphinx-copybutton": "min", "numpydoc": "min", "sphinx-prompt": "min", "sphinxext-opengraph": "min", "plotly": "min", + "polars": "min", + "pooch": "min", + "sphinx-design": "min", + "sphinxcontrib-sass": "min", + "sphinx-remove-toctrees": "min", + "pydata-sphinx-theme": "min", }, }, { - "build_name": "doc", + "name": "doc", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + [ + "conda_dependencies": common_dependencies_without_coverage + + [ "scikit-image", "seaborn", "memory_profiler", "compilers", "sphinx", "sphinx-gallery", + "sphinx-copybutton", "numpydoc", "sphinx-prompt", "plotly", + "polars", "pooch", "sphinxext-opengraph", + "sphinx-remove-toctrees", + "sphinx-design", + "pydata-sphinx-theme", + ], + "pip_dependencies": [ + "jupyterlite-sphinx", + "jupyterlite-pyodide-kernel", + "sphinxcontrib-sass", ], - "pip_dependencies": ["jupyterlite-sphinx", "jupyterlite-pyodide-kernel"], "package_constraints": { "python": "3.9", - # XXX: sphinx > 6.0 does not correctly generate searchindex.js - "sphinx": "6.0.0", }, }, { - "build_name": "py39_conda_forge", + "name": "pymin_conda_forge", + "type": "conda", + "tag": "arm", "folder": "build_tools/cirrus", "platform": "linux-aarch64", "channel": "conda-forge", "conda_dependencies": remove_from( common_dependencies_without_coverage, ["pandas", "pyamg"] - ) + ["pip", "ccache"], + ) + + ["pip", "ccache"], "package_constraints": { "python": "3.9", }, }, -] - - -pip_build_metadata_list = [ { - "build_name": "debian_atlas_32bit", + "name": "debian_atlas_32bit", + "type": "pip", + "tag": "main-ci", "folder": "build_tools/azure", "pip_dependencies": [ "cython", @@ -319,19 +394,24 @@ def remove_from(alist, to_remove): "threadpoolctl", "pytest", "pytest-cov", + "ninja", + "meson-python", ], "package_constraints": { "joblib": "min", - "threadpoolctl": "2.2.0", + "threadpoolctl": "3.1.0", "pytest": "min", "pytest-cov": "min", # no pytest-xdist because it causes issue on 32bit + "cython": "min", }, # same Python version as in debian-32 build "python_version": "3.9.2", }, { - "build_name": "ubuntu_atlas", + "name": "ubuntu_atlas", + "type": "pip", + "tag": "main-ci", "folder": "build_tools/azure", "pip_dependencies": [ "cython", @@ -339,20 +419,27 @@ def remove_from(alist, to_remove): "threadpoolctl", "pytest", "pytest-xdist", + "ninja", + "meson-python", ], - "package_constraints": {"joblib": "min", "threadpoolctl": "min"}, + "package_constraints": { + "joblib": "min", + "threadpoolctl": "min", + "cython": "min", + }, "python_version": "3.10.4", }, ] def execute_command(command_list): + logger.debug(" ".join(command_list)) proc = subprocess.Popen( command_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) out, err = proc.communicate() - out, err = out.decode(), err.decode() + out, err = out.decode(errors="replace"), err.decode(errors="replace") if proc.returncode != 0: command_str = " ".join(command_list) @@ -363,6 +450,7 @@ def execute_command(command_list): "stdout:\n{}\n" "stderr:\n{}\n".format(proc.returncode, command_str, out, err) ) + logger.log(TRACE, out) return out @@ -397,7 +485,8 @@ def get_package_with_constraint(package_name, build_metadata, uses_pip=False): def get_conda_environment_content(build_metadata): - template = environment.from_string(""" + template = environment.from_string( + """ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py @@ -413,15 +502,17 @@ def get_conda_environment_content(build_metadata): {% for pip_dep in build_metadata.get('pip_dependencies', []) %} - {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }} {% endfor %} - {% endif %}""".strip()) + {% endif %}""".strip() + ) return template.render(build_metadata=build_metadata) def write_conda_environment(build_metadata): content = get_conda_environment_content(build_metadata) - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] folder_path = Path(build_metadata["folder"]) output_path = folder_path / f"{build_name}_environment.yml" + logger.debug(output_path) output_path.write_text(content) @@ -431,17 +522,25 @@ def write_all_conda_environments(build_metadata_list): def conda_lock(environment_path, lock_file_path, platform): - command = ( - f"conda-lock lock --mamba --kind explicit --platform {platform} " - f"--file {environment_path} --filename-template {lock_file_path}" + execute_command( + [ + "conda-lock", + "lock", + "--mamba", + "--kind", + "explicit", + "--platform", + platform, + "--file", + str(environment_path), + "--filename-template", + str(lock_file_path), + ] ) - logger.debug("conda-lock command: %s", command) - execute_command(shlex.split(command)) - def create_conda_lock_file(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] folder_path = Path(build_metadata["folder"]) environment_path = folder_path / f"{build_name}_environment.yml" platform = build_metadata["platform"] @@ -455,44 +554,51 @@ def create_conda_lock_file(build_metadata): def write_all_conda_lock_files(build_metadata_list): for build_metadata in build_metadata_list: - logger.info(build_metadata["build_name"]) + logger.info(f"# Locking dependencies for {build_metadata['name']}") create_conda_lock_file(build_metadata) def get_pip_requirements_content(build_metadata): - template = environment.from_string(""" + template = environment.from_string( + """ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py {% for pip_dep in build_metadata['pip_dependencies'] %} {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }} -{% endfor %}""".strip()) +{% endfor %}""".strip() + ) return template.render(build_metadata=build_metadata) def write_pip_requirements(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] content = get_pip_requirements_content(build_metadata) folder_path = Path(build_metadata["folder"]) output_path = folder_path / f"{build_name}_requirements.txt" + logger.debug(output_path) output_path.write_text(content) def write_all_pip_requirements(build_metadata_list): for build_metadata in build_metadata_list: - logger.info(build_metadata["build_name"]) write_pip_requirements(build_metadata) def pip_compile(pip_compile_path, requirements_path, lock_file_path): - command = f"{pip_compile_path} --upgrade {requirements_path} -o {lock_file_path}" - - logger.debug("pip-compile command: %s", command) - execute_command(shlex.split(command)) + execute_command( + [ + str(pip_compile_path), + "--upgrade", + str(requirements_path), + "-o", + str(lock_file_path), + ] + ) def write_pip_lock_file(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] python_version = build_metadata["python_version"] environment_name = f"pip-tools-python{python_version}" # To make sure that the Python used to create the pip lock file is the same @@ -500,13 +606,21 @@ def write_pip_lock_file(build_metadata): # create a conda environment with the correct Python version and # pip-compile and run pip-compile in this environment - command = ( - "conda create -c conda-forge -n" - f" pip-tools-python{python_version} python={python_version} pip-tools -y" + execute_command( + [ + "conda", + "create", + "-c", + "conda-forge", + "-n", + f"pip-tools-python{python_version}", + f"python={python_version}", + "pip-tools", + "-y", + ] ) - execute_command(shlex.split(command)) - json_output = execute_command(shlex.split("conda info --json")) + json_output = execute_command(["conda", "info", "--json"]) conda_info = json.loads(json_output) environment_folder = [ each for each in conda_info["envs"] if each.endswith(environment_name) @@ -522,6 +636,7 @@ def write_pip_lock_file(build_metadata): def write_all_pip_lock_files(build_metadata_list): for build_metadata in build_metadata_list: + logger.info(f"# Locking dependencies for {build_metadata['name']}") write_pip_lock_file(build_metadata) @@ -539,33 +654,105 @@ def check_conda_lock_version(): ) +def check_conda_version(): + # Avoid issues with glibc (https://github.com/conda/conda-lock/issues/292) + # or osx (https://github.com/conda/conda-lock/issues/408) virtual package. + # The glibc one has been fixed in conda 23.1.0 and the osx has been fixed + # in conda 23.7.0. + conda_info_output = execute_command(["conda", "info", "--json"]) + + conda_info = json.loads(conda_info_output) + conda_version = Version(conda_info["conda_version"]) + + if Version("22.9.0") < conda_version < Version("23.7"): + raise RuntimeError( + f"conda version should be <= 22.9.0 or >= 23.7 got: {conda_version}" + ) + + @click.command() @click.option( "--select-build", default="", - help="Regex to restrict the builds we want to update environment and lock files", + help=( + "Regex to filter the builds we want to update environment and lock files. By" + " default all the builds are selected." + ), +) +@click.option( + "--skip-build", + default=None, + help="Regex to skip some builds from the builds selected by --select-build", +) +@click.option( + "--select-tag", + default=None, + help=( + "Tag to filter the builds, e.g. 'main-ci' or 'scipy-dev'. " + "This is an additional filtering on top of --select-build." + ), ) -def main(select_build): +@click.option( + "-v", + "--verbose", + is_flag=True, + help="Print commands executed by the script", +) +@click.option( + "-vv", + "--very-verbose", + is_flag=True, + help="Print output of commands executed by the script", +) +def main(select_build, skip_build, select_tag, verbose, very_verbose): + if verbose: + logger.setLevel(logging.DEBUG) + if very_verbose: + logger.setLevel(TRACE) + handler.setLevel(TRACE) check_conda_lock_version() + check_conda_version() + + filtered_build_metadata_list = [ + each for each in build_metadata_list if re.search(select_build, each["name"]) + ] + if select_tag is not None: + filtered_build_metadata_list = [ + each for each in build_metadata_list if each["tag"] == select_tag + ] + if skip_build is not None: + filtered_build_metadata_list = [ + each + for each in filtered_build_metadata_list + if not re.search(skip_build, each["name"]) + ] + + selected_build_info = "\n".join( + f" - {each['name']}, type: {each['type']}, tag: {each['tag']}" + for each in filtered_build_metadata_list + ) + selected_build_message = ( + f"# {len(filtered_build_metadata_list)} selected builds\n{selected_build_info}" + ) + logger.info(selected_build_message) + filtered_conda_build_metadata_list = [ - each - for each in conda_build_metadata_list - if re.search(select_build, each["build_name"]) + each for each in filtered_build_metadata_list if each["type"] == "conda" ] - logger.info("Writing conda environments") - write_all_conda_environments(filtered_conda_build_metadata_list) - logger.info("Writing conda lock files") - write_all_conda_lock_files(filtered_conda_build_metadata_list) + if filtered_conda_build_metadata_list: + logger.info("# Writing conda environments") + write_all_conda_environments(filtered_conda_build_metadata_list) + logger.info("# Writing conda lock files") + write_all_conda_lock_files(filtered_conda_build_metadata_list) filtered_pip_build_metadata_list = [ - each - for each in pip_build_metadata_list - if re.search(select_build, each["build_name"]) + each for each in filtered_build_metadata_list if each["type"] == "pip" ] - logger.info("Writing pip requirements") - write_all_pip_requirements(filtered_pip_build_metadata_list) - logger.info("Writing pip lock files") - write_all_pip_lock_files(filtered_pip_build_metadata_list) + if filtered_pip_build_metadata_list: + logger.info("# Writing pip requirements") + write_all_pip_requirements(filtered_pip_build_metadata_list) + logger.info("# Writing pip lock files") + write_all_pip_lock_files(filtered_pip_build_metadata_list) if __name__ == "__main__": diff --git a/build_tools/wheels/build_wheels.sh b/build_tools/wheels/build_wheels.sh index bea9218b3826c..90bf64e979b0e 100755 --- a/build_tools/wheels/build_wheels.sh +++ b/build_tools/wheels/build_wheels.sh @@ -3,6 +3,18 @@ set -e set -x +# Set environment variables to make our wheel build easier to reproduce byte +# for byte from source. See https://reproducible-builds.org/. The long term +# motivation would be to be able to detect supply chain attacks. +# +# In particular we set SOURCE_DATE_EPOCH to the commit date of the last commit. +# +# XXX: setting those environment variables is not enough. See the following +# issue for more details on what remains to do: +# https://github.com/scikit-learn/scikit-learn/issues/28151 +export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct) +export PYTHONHASHSEED=0 + # OpenMP is not present on macOS by default if [[ $(uname) == "Darwin" ]]; then # Make sure to use a libomp version binary compatible with the oldest @@ -35,14 +47,18 @@ if [[ $(uname) == "Darwin" ]]; then export CFLAGS="$CFLAGS -I$PREFIX/include" export CXXFLAGS="$CXXFLAGS -I$PREFIX/include" export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib -L$PREFIX/lib -lomp" +fi - if [[ $(uname -m) == "arm64" && "$CIBW_BUILD" == "cp38-macosx_arm64" ]]; then - # Enables native building and testing for macosx arm on Python 3.8. For details see: - # https://cibuildwheel.readthedocs.io/en/stable/faq/#macos-building-cpython-38-wheels-on-arm64 - curl -o /tmp/Python38.pkg https://www.python.org/ftp/python/3.8.10/python-3.8.10-macos11.pkg - sudo installer -pkg /tmp/Python38.pkg -target / - sh "/Applications/Python 3.8/Install Certificates.command" - fi + +if [[ "$GITHUB_EVENT_NAME" == "schedule" \ + || "$GITHUB_EVENT_NAME" == "workflow_dispatch" \ + || "$CIRRUS_CRON" == "nightly" ]]; then + # Nightly build: See also `../github/upload_anaconda.sh` (same branching). + # To help with NumPy 2.0 transition, ensure that we use the NumPy 2.0 + # nightlies. This lives on the edge and opts-in to all pre-releases. + # That could be an issue, in which case no-build-isolation and a targeted + # NumPy install may be necessary, instead. + export CIBW_BUILD_FRONTEND='pip; args: --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"' fi # The version of the built dependencies are specified diff --git a/build_tools/wheels/test_wheels.sh b/build_tools/wheels/test_wheels.sh index bfbe769add657..e8cdf4b3ea8a2 100755 --- a/build_tools/wheels/test_wheels.sh +++ b/build_tools/wheels/test_wheels.sh @@ -3,14 +3,6 @@ set -e set -x -UNAME=$(uname) - -if [[ "$UNAME" != "Linux" ]]; then - # The Linux test environment is run in a Docker container and - # it is not possible to copy the test configuration file (yet) - cp $CONFTEST_PATH $CONFTEST_NAME -fi - python -c "import joblib; print(f'Number of cores (physical): \ {joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')" diff --git a/conftest.py b/conftest.py deleted file mode 100644 index e4e478d2d72d7..0000000000000 --- a/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -# Even if empty this file is useful so that when running from the root folder -# ./sklearn is added to sys.path by pytest. See -# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more -# details. For example, this allows to build extensions in place and run pytest -# doc/modules/clustering.rst and use sklearn from the local folder rather than -# the one from site-packages. diff --git a/doc/Makefile b/doc/Makefile index 2ee611ccb5cf0..f84d3c78b8051 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -T SPHINXBUILD ?= sphinx-build PAPER = BUILDDIR = _build @@ -24,7 +24,7 @@ endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\ +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\ $(EXAMPLES_PATTERN_OPTS) . @@ -47,9 +47,17 @@ help: clean: -rm -rf $(BUILDDIR)/* + @echo "Removed $(BUILDDIR)/*" -rm -rf auto_examples/ + @echo "Removed auto_examples/" -rm -rf generated/* + @echo "Removed generated/" -rm -rf modules/generated/ + @echo "Removed modules/generated/" + -rm -rf css/styles/ + @echo "Removed css/styles/" + -rm -rf api/*.rst + @echo "Removed api/*.rst" # Default to SPHINX_NUMJOBS=1 for full documentation build. Using # SPHINX_NUMJOBS!=1 may actually slow down the build, or cause weird issues in diff --git a/doc/README.md b/doc/README.md index 8cace706efd35..537ed85006006 100644 --- a/doc/README.md +++ b/doc/README.md @@ -1,6 +1,6 @@ # Documentation for scikit-learn This directory contains the full manual and website as displayed at -http://scikit-learn.org. See -http://scikit-learn.org/dev/developers/contributing.html#documentation for -detailed information about the documentation. +https://scikit-learn.org. See +https://scikit-learn.org/dev/developers/contributing.html#documentation for +detailed information about the documentation. diff --git a/doc/about.rst b/doc/about.rst index eabd8d5e251d9..47d57e4737318 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -13,8 +13,8 @@ this project as part of his thesis. In 2010 Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort and Vincent Michel of INRIA took leadership of the project and made the first public release, February the 1st 2010. Since then, several releases have appeared -following a ~ 3-month cycle, and a thriving international community has -been leading the development. +following an approximately 3-month cycle, and a thriving international +community has been leading the development. Governance ---------- @@ -22,25 +22,48 @@ Governance The decision making process and governance structure of scikit-learn is laid out in the :ref:`governance document `. -Authors -------- +.. The "author" anchors below is there to ensure that old html links (in + the form of "about.html#author" still work) + +.. _authors: + +The people behind scikit-learn +------------------------------ + +Scikit-learn is a community project, developed by a large group of +people, all across the world. A few teams, listed below, have central +roles, however a more complete list of contributors can be found `on +github +`__. + +Maintainers Team +................ -The following people are currently core contributors to scikit-learn's development -and maintenance: +The following people are currently maintainers, in charge of +consolidating scikit-learn's development and maintenance: -.. include:: authors.rst +.. include:: maintainers.rst -Please do not email the authors directly to ask for assistance or report issues. -Instead, please see `What's the best way to ask questions about scikit-learn -`_ -in the FAQ. +.. note:: + + Please do not email the authors directly to ask for assistance or report issues. + Instead, please see `What's the best way to ask questions about scikit-learn + `_ + in the FAQ. .. seealso:: - :ref:`How you can contribute to the project ` + How you can :ref:`contribute to the project `. + +Documentation Team +.................. + +The following people help with documenting the project: + +.. include:: documentation_team.rst Contributor Experience Team ---------------------------- +........................... The following people are active contributors who also help with :ref:`triaging issues `, PRs, and general @@ -49,24 +72,23 @@ maintenance: .. include:: contributor_experience_team.rst Communication Team ------------------- +.................. The following people help with :ref:`communication around scikit-learn `. .. include:: communication_team.rst - Emeritus Core Developers ------------------------- +........................ The following people have been active contributors in the past, but are no longer active in the project: -.. include:: authors_emeritus.rst +.. include:: maintainers_emeritus.rst Emeritus Communication Team ---------------------------- +........................... The following people have been active in the communication team in the past, but no longer have communication responsibilities: @@ -74,7 +96,7 @@ past, but no longer have communication responsibilities: .. include:: communication_team_emeritus.rst Emeritus Contributor Experience Team ------------------------------------- +.................................... The following people have been active in the contributor experience team in the past: @@ -89,44 +111,44 @@ Citing scikit-learn If you use scikit-learn in a scientific publication, we would appreciate citations to the following paper: - `Scikit-learn: Machine Learning in Python - `_, Pedregosa - *et al.*, JMLR 12, pp. 2825-2830, 2011. - - Bibtex entry:: - - @article{scikit-learn, - title={Scikit-learn: Machine Learning in {P}ython}, - author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. - and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. - and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and - Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, - journal={Journal of Machine Learning Research}, - volume={12}, - pages={2825--2830}, - year={2011} - } +`Scikit-learn: Machine Learning in Python +`_, Pedregosa +*et al.*, JMLR 12, pp. 2825-2830, 2011. + +Bibtex entry:: + + @article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} + } If you want to cite scikit-learn for its API or design, you may also want to consider the following paper: - :arxiv:`API design for machine learning software: experiences from the scikit-learn - project <1309.0238>`, Buitinck *et al.*, 2013. - - Bibtex entry:: - - @inproceedings{sklearn_api, - author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and - Fabian Pedregosa and Andreas Mueller and Olivier Grisel and - Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort - and Jaques Grobler and Robert Layton and Jake VanderPlas and - Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux}, - title = {{API} design for machine learning software: experiences from the scikit-learn - project}, - booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning}, - year = {2013}, - pages = {108--122}, - } +:arxiv:`API design for machine learning software: experiences from the scikit-learn +project <1309.0238>`, Buitinck *et al.*, 2013. + +Bibtex entry:: + + @inproceedings{sklearn_api, + author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and + Fabian Pedregosa and Andreas Mueller and Olivier Grisel and + Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort + and Jaques Grobler and Robert Layton and Jake VanderPlas and + Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux}, + title = {{API} design for machine learning software: experiences from the scikit-learn + project}, + booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning}, + year = {2013}, + pages = {108--122}, + } Artwork ------- @@ -136,411 +158,305 @@ High quality PNG and SVG logos are available in the `doc/logos/ source directory. .. image:: images/scikit-learn-logo-notext.png - :align: center + :align: center Funding ------- -Scikit-Learn is a community driven project, however institutional and private + +Scikit-learn is a community driven project, however institutional and private grants help to assure its sustainability. The project would like to thank the following funders. ................................... -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -The `Members `_ of -the `Scikit-Learn Consortium at Inria Foundation -`_ fund Olivier -Grisel, Guillaume Lemaitre, and JÊrÊmie du Boisberranger. + `:probabl. `_ funds Adrin Jalali, Arturo Amor, François Goupil, + Guillaume Lemaitre, JÊrÊmie du Boisberranger, Olivier Grisel, and Stefanie Senger. -.. raw:: html + .. div:: image-box -
+ .. image:: images/probabl.png + :target: https://probabl.ai -.. |msn| image:: images/microsoft.png - :width: 100pt - :target: https://www.microsoft.com/ +.......... -.. |bcg| image:: images/bcg.png - :width: 100pt - :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx +.. |chanel| image:: images/chanel.png + :target: https://www.chanel.com .. |axa| image:: images/axa.png - :width: 50pt - :target: https://www.axa.fr/ + :target: https://www.axa.fr/ .. |bnp| image:: images/bnp.png - :width: 150pt - :target: https://www.bnpparibascardif.com/ - -.. |fujitsu| image:: images/fujitsu.png - :width: 100pt - :target: https://www.fujitsu.com/global/ + :target: https://www.bnpparibascardif.com/ .. |dataiku| image:: images/dataiku.png - :width: 70pt - :target: https://www.dataiku.com/ + :target: https://www.dataiku.com/ -.. |aphp| image:: images/logo_APHP_text.png - :width: 150pt - :target: https://aphp.fr/ +.. |hf| image:: images/huggingface_logo-noborder.png + :target: https://huggingface.co -.. |inria| image:: images/inria-logo.jpg - :width: 100pt - :target: https://www.inria.fr - - -.. raw:: html +.. |nvidia| image:: images/nvidia.png + :target: https://www.nvidia.com -
- -.. table:: - :class: sk-sponsor-table align-default - - +---------+----------+ - | |bcg| | - +---------+----------+ - | | - +---------+----------+ - | |axa| | |bnp| | - +---------+----------+ - ||fujitsu|| |msn| | - +---------+----------+ - | | - +---------+----------+ - | |dataiku| | - +---------+----------+ - | |aphp| | - +---------+----------+ - | | - +---------+----------+ - | |inria| | - +---------+----------+ +.. |inria| image:: images/inria-logo.jpg + :target: https://www.inria.fr .. raw:: html -
-
+ -
-
+.. div:: sk-text-image-grid-small -`Hugging Face `_ funds Adrin Jalali since 2022. + .. div:: text-box -.. raw:: html + The `Members `_ of + the `Scikit-learn Consortium at Inria Foundation + `_ help at maintaining and + improving the project through their financial support. -
+ .. div:: image-box -
+ .. table:: + :class: image-subtable -.. image:: images/huggingface_logo-noborder.png - :width: 55pt - :align: center - :target: https://huggingface.co/ + +----------+-----------+ + | |chanel| | + +----------+-----------+ + | |axa| | |bnp| | + +----------+-----------+ + | |nvidia| | |hf| | + +----------+-----------+ + | |dataiku| | + +----------+-----------+ + | |inria| | + +----------+-----------+ -.. raw:: html +.......... -
-
+.. div:: sk-text-image-grid-small -........... + .. div:: text-box -.. raw:: html + `NVidia `_ funds Tim Head since 2022 + and is part of the scikit-learn consortium at Inria. -
-
+ .. div:: image-box -`Microsoft `_ funds Andreas MÃŧller since 2020. + .. image:: images/nvidia.png + :target: https://nvidia.com -.. raw:: html +.......... -
+.. div:: sk-text-image-grid-small -
+ .. div:: text-box -.. image:: images/microsoft.png - :width: 100pt - :align: center - :target: https://www.microsoft.com/ + `Microsoft `_ funds Andreas MÃŧller since 2020. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/microsoft.png + :target: https://microsoft.com ........... -.. raw:: html - -
-
- -`Quansight Labs `_ funds Thomas J. Fan since 2021. +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
+ `Quansight Labs `_ funds Lucy Liu since 2022. -
+ .. div:: image-box -.. image:: images/quansight-labs.png - :width: 100pt - :align: center - :target: https://labs.quansight.org + .. image:: images/quansight-labs.png + :target: https://labs.quansight.org -.. raw:: html +........... -
-
+.. div:: sk-text-image-grid-small -Past Sponsors -............. + .. div:: text-box -.. raw:: html + `Tidelift `_ supports the project via their service + agreement. -
-
+ .. div:: image-box -`Columbia University `_ funded Andreas MÃŧller -(2016-2020). + .. image:: images/Tidelift-logo-on-light.svg + :target: https://tidelift.com/ -.. raw:: html +........... -
-
+Past Sponsors +............. -.. image:: images/columbia.png - :width: 50pt - :align: center - :target: https://www.columbia.edu/ +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
-
+ `Quansight Labs `_ funded Meekail Zain in 2022 and 2023, + and funded Thomas J. Fan from 2021 to 2023. -........ + .. div:: image-box -.. raw:: html + .. image:: images/quansight-labs.png + :target: https://labs.quansight.org -
-
+........... -`The University of Sydney `_ funded Joel Nothman -(2017-2021). +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
+ `Columbia University `_ funded Andreas MÃŧller + (2016-2020). -
+ .. div:: image-box -.. image:: images/sydney-primary.jpeg - :width: 100pt - :align: center - :target: https://sydney.edu.au/ + .. image:: images/columbia.png + :target: https://columbia.edu -.. raw:: html +........ -
-
+.. div:: sk-text-image-grid-small -........... + .. div:: text-box -.. raw:: html + `The University of Sydney `_ funded Joel Nothman + (2017-2021). -
-
+ .. div:: image-box -Andreas MÃŧller received a grant to improve scikit-learn from the -`Alfred P. Sloan Foundation `_ . -This grant supported the position of Nicolas Hug and Thomas J. Fan. + .. image:: images/sydney-primary.jpeg + :target: https://sydney.edu.au/ -.. raw:: html +........... -
+.. div:: sk-text-image-grid-small -
+ .. div:: text-box -.. image:: images/sloan_banner.png - :width: 100pt - :align: center - :target: https://sloan.org/ + Andreas MÃŧller received a grant to improve scikit-learn from the + `Alfred P. Sloan Foundation `_ . + This grant supported the position of Nicolas Hug and Thomas J. Fan. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/sloan_banner.png + :target: https://sloan.org/ ............. -.. raw:: html - -
-
+.. div:: sk-text-image-grid-small -`INRIA `_ actively supports this project. It has -provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler -(2012-2013) and Olivier Grisel (2013-2017) to work on this project -full-time. It also hosts coding sprints and other events. - -.. raw:: html + .. div:: text-box -
+ `INRIA `_ actively supports this project. It has + provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler + (2012-2013) and Olivier Grisel (2013-2017) to work on this project + full-time. It also hosts coding sprints and other events. -
+ .. div:: image-box -.. image:: images/inria-logo.jpg - :width: 100pt - :align: center - :target: https://www.inria.fr - -.. raw:: html - -
-
+ .. image:: images/inria-logo.jpg + :target: https://www.inria.fr ..................... -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -`Paris-Saclay Center for Data Science -`_ -funded one year for a developer to work on the project full-time -(2014-2015), 50% of the time of Guillaume Lemaitre (2016-2017) and 50% of the -time of Joris van den Bossche (2017-2018). + `Paris-Saclay Center for Data Science `_ + funded one year for a developer to work on the project full-time (2014-2015), 50% + of the time of Guillaume Lemaitre (2016-2017) and 50% of the time of Joris van den + Bossche (2017-2018). -.. raw:: html - -
-
- -.. image:: images/cds-logo.png - :width: 100pt - :align: center - :target: http://www.datascience-paris-saclay.fr/ - -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/cds-logo.png + :target: http://www.datascience-paris-saclay.fr/ .......................... -.. raw:: html - -
-
- -`NYU Moore-Sloan Data Science Environment `_ -funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan -Data Science Environment also funds several students to work on the project -part-time. +.. div:: sk-text-image-grid-small -.. raw:: html - -
-
+ .. div:: text-box -.. image:: images/nyu_short_color.png - :width: 100pt - :align: center - :target: https://cds.nyu.edu/mooresloan/ + `NYU Moore-Sloan Data Science Environment `_ + funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan + Data Science Environment also funds several students to work on the project + part-time. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/nyu_short_color.png + :target: https://cds.nyu.edu/mooresloan/ ........................ -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -`TÊlÊcom Paristech `_ funded Manoj Kumar -(2014), Tom DuprÊ la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot -(2016-2017) and Albert Thomas (2017) to work on scikit-learn. + `TÊlÊcom Paristech `_ funded Manoj Kumar + (2014), Tom DuprÊ la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot + (2016-2017) and Albert Thomas (2017) to work on scikit-learn. -.. raw:: html - -
-
- -.. image:: images/telecom.png - :width: 50pt - :align: center - :target: https://www.telecom-paristech.fr/ - -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/telecom.png + :target: https://www.telecom-paristech.fr/ ..................... -.. raw:: html - -
-
- -`The Labex DigiCosme `_ funded Nicolas Goix -(2015-2016), Tom DuprÊ la Tour (2015-2016 and 2017-2018), Mathurin Massias -(2018-2019) to work part time on scikit-learn during their PhDs. It also -funded a scikit-learn coding sprint in 2015. - -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -.. image:: images/digicosme.png - :width: 100pt - :align: center - :target: https://digicosme.lri.fr + `The Labex DigiCosme `_ funded Nicolas Goix + (2015-2016), Tom DuprÊ la Tour (2015-2016 and 2017-2018), Mathurin Massias + (2018-2019) to work part time on scikit-learn during their PhDs. It also + funded a scikit-learn coding sprint in 2015. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/digicosme.png + :target: https://digicosme.lri.fr ..................... -.. raw:: html - -
-
+.. div:: sk-text-image-grid-small -`The Chan-Zuckerberg Initiative `_ funded Nicolas -Hug to work full-time on scikit-learn in 2020. + .. div:: text-box -.. raw:: html - -
-
+ `The Chan-Zuckerberg Initiative `_ funded Nicolas + Hug to work full-time on scikit-learn in 2020. -.. image:: images/czi_logo.svg - :width: 100pt - :align: center - :target: https://chanzuckerberg.com + .. div:: image-box -.. raw:: html - -
-
+ .. image:: images/czi_logo.svg + :target: https://chanzuckerberg.com ...................... @@ -551,9 +467,9 @@ program. - 2007 - David Cournapeau - 2011 - `Vlad Niculae`_ -- 2012 - `Vlad Niculae`_, Immanuel Bayer. +- 2012 - `Vlad Niculae`_, Immanuel Bayer - 2013 - Kemal Eren, Nicolas TrÊsegnie -- 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar. +- 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar - 2015 - `Raghav RV `_, Wei Xue - 2016 - `Nelson Liu `_, `YenChen Lin `_ @@ -566,67 +482,118 @@ The `NeuroDebian `_ project providing `Debian `Dr. James V. Haxby `_ (`Dartmouth College `_). +................... + +The following organizations funded the scikit-learn consortium at Inria in +the past: + +.. |msn| image:: images/microsoft.png + :target: https://www.microsoft.com/ + +.. |bcg| image:: images/bcg.png + :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx + +.. |fujitsu| image:: images/fujitsu.png + :target: https://www.fujitsu.com/global/ + +.. |aphp| image:: images/logo_APHP_text.png + :target: https://aphp.fr/ + +.. raw:: html + + + +.. grid:: 2 2 4 4 + :class-row: image-subgrid + :gutter: 1 + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |msn| + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |bcg| + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |fujitsu| + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |aphp| + + Sprints ------- -The International 2019 Paris sprint was kindly hosted by `AXA `_. -Also some participants could attend thanks to the support of the `Alfred P. -Sloan Foundation `_, the `Python Software -Foundation `_ (PSF) and the `DATAIA Institute -`_. - -..................... +- The International 2019 Paris sprint was kindly hosted by `AXA `_. + Also some participants could attend thanks to the support of the `Alfred P. + Sloan Foundation `_, the `Python Software + Foundation `_ (PSF) and the `DATAIA Institute + `_. -The 2013 International Paris Sprint was made possible thanks to the support of -`TÊlÊcom Paristech `_, `tinyclues -`_, the `French Python Association -`_ and the `Fonds de la Recherche Scientifique -`_. +- The 2013 International Paris Sprint was made possible thanks to the support of + `TÊlÊcom Paristech `_, `tinyclues + `_, the `French Python Association + `_ and the `Fonds de la Recherche Scientifique + `_. -.............. +- The 2011 International Granada sprint was made possible thanks to the support + of the `PSF `_ and `tinyclues + `_. -The 2011 International Granada sprint was made possible thanks to the support -of the `PSF `_ and `tinyclues -`_. Donating to the project -....................... +----------------------- If you are interested in donating to the project or to one of our code-sprints, please donate via the `NumFOCUS Donations Page `_. -.. raw :: html - - -
+.. raw:: html -All donations will be handled by `NumFOCUS -`_, a non-profit-organization which is -managed by a board of `Scipy community members -`_. NumFOCUS's mission is to foster -scientific computing software, in particular in Python. As a fiscal home -of scikit-learn, it ensures that money is available when needed to keep -the project funded and available while in compliance with tax regulations. +

+ + Help us, donate! + +

-The received donations for the scikit-learn project mostly will go towards -covering travel-expenses for code sprints, as well as towards the organization -budget of the project [#f1]_. +All donations will be handled by `NumFOCUS `_, a non-profit +organization which is managed by a board of `Scipy community members +`_. NumFOCUS's mission is to foster scientific +computing software, in particular in Python. As a fiscal home of scikit-learn, it +ensures that money is available when needed to keep the project funded and available +while in compliance with tax regulations. +The received donations for the scikit-learn project mostly will go towards covering +travel-expenses for code sprints, as well as towards the organization budget of the +project [#f1]_. .. rubric:: Notes .. [#f1] Regarding the organization budget, in particular, we might use some of - the donated funds to pay for other project expenses such as DNS, - hosting or continuous integration services. + the donated funds to pay for other project expenses such as DNS, + hosting or continuous integration services. + Infrastructure support ---------------------- -- We would also like to thank `Microsoft Azure - `_, `Cirrus Cl `_, - `CircleCl `_ for free CPU time on their Continuous - Integration servers, and `Anaconda Inc. `_ for the - storage they provide for our staging and nightly builds. +We would also like to thank `Microsoft Azure `_, +`Cirrus Cl `_, `CircleCl `_ for free CPU +time on their Continuous Integration servers, and `Anaconda Inc. `_ +for the storage they provide for our staging and nightly builds. diff --git a/doc/api/deprecated.rst.template b/doc/api/deprecated.rst.template new file mode 100644 index 0000000000000..a48f0180f76ed --- /dev/null +++ b/doc/api/deprecated.rst.template @@ -0,0 +1,24 @@ +:html_theme.sidebar_secondary.remove: + +.. _api_depr_ref: + +Recently Deprecated +=================== + +.. currentmodule:: sklearn + +{% for ver, objs in DEPRECATED_API_REFERENCE %} +.. _api_depr_ref-{{ ver|replace(".", "-") }}: + +.. rubric:: To be removed in {{ ver }} + +.. autosummary:: + :nosignatures: + :toctree: ../modules/generated/ + :template: base.rst + +{% for obj in objs %} + {{ obj }} +{%- endfor %} + +{% endfor %} diff --git a/doc/api/index.rst.template b/doc/api/index.rst.template new file mode 100644 index 0000000000000..a9f3209d350de --- /dev/null +++ b/doc/api/index.rst.template @@ -0,0 +1,77 @@ +:html_theme.sidebar_secondary.remove: + +.. _api_ref: + +============= +API Reference +============= + +This is the class and function reference of scikit-learn. Please refer to the +:ref:`full user guide ` for further details, as the raw specifications of +classes and functions may not be enough to give full guidelines on their uses. For +reference on concepts repeated across the API, see :ref:`glossary`. + +.. toctree:: + :maxdepth: 2 + :hidden: + +{% for module, _ in API_REFERENCE %} + {{ module }} +{%- endfor %} +{%- if DEPRECATED_API_REFERENCE %} + deprecated +{%- endif %} + +.. list-table:: + :header-rows: 1 + :class: apisearch-table + + * - Object + - Description + +{% for module, module_info in API_REFERENCE %} +{% for section in module_info["sections"] %} +{% for obj in section["autosummary"] %} +{% set parts = obj.rsplit(".", 1) %} +{% if parts|length > 1 %} +{% set full_module = module + "." + parts[0] %} +{% else %} +{% set full_module = module %} +{% endif %} + * - :obj:`~{{ module }}.{{ obj }}` + + - .. div:: sk-apisearch-desc + + .. currentmodule:: {{ full_module }} + + .. autoshortsummary:: {{ module }}.{{ obj }} + + .. div:: caption + + :mod:`{{ full_module }}` +{% endfor %} +{% endfor %} +{% endfor %} + +{% for ver, objs in DEPRECATED_API_REFERENCE %} +{% for obj in objs %} +{% set parts = obj.rsplit(".", 1) %} +{% if parts|length > 1 %} +{% set full_module = "sklearn." + parts[0] %} +{% else %} +{% set full_module = "sklearn" %} +{% endif %} + * - :obj:`~sklearn.{{ obj }}` + + - .. div:: sk-apisearch-desc + + .. currentmodule:: {{ full_module }} + + .. autoshortsummary:: sklearn.{{ obj }} + + .. div:: caption + + :mod:`{{ full_module }}` + :bdg-ref-danger-line:`Deprecated in version {{ ver }} ` +{% endfor %} +{% endfor %} diff --git a/doc/api/module.rst.template b/doc/api/module.rst.template new file mode 100644 index 0000000000000..1980f27aad158 --- /dev/null +++ b/doc/api/module.rst.template @@ -0,0 +1,46 @@ +:html_theme.sidebar_secondary.remove: + +{% if module == "sklearn" -%} +{%- set module_hook = "sklearn" -%} +{%- elif module.startswith("sklearn.") -%} +{%- set module_hook = module[8:] -%} +{%- else -%} +{%- set module_hook = None -%} +{%- endif -%} + +{% if module_hook %} +.. _{{ module_hook }}_ref: +{% endif %} + +{{ module }} +{{ "=" * module|length }} + +.. automodule:: {{ module }} + +{% if module_info["description"] %} +{{ module_info["description"] }} +{% endif %} + +{% for section in module_info["sections"] %} +{% if section["title"] and module_hook %} +.. _{{ module_hook }}_ref-{{ section["title"]|lower|replace(" ", "-") }}: +{% endif %} + +{% if section["title"] %} +{{ section["title"] }} +{{ "-" * section["title"]|length }} +{% endif %} + +{% if section["description"] %} +{{ section["description"] }} +{% endif %} + +.. autosummary:: + :nosignatures: + :toctree: ../modules/generated/ + :template: base.rst + +{% for obj in section["autosummary"] %} + {{ obj }} +{%- endfor %} +{% endfor %} diff --git a/doc/api_reference.py b/doc/api_reference.py new file mode 100644 index 0000000000000..d22f2d5aa8d13 --- /dev/null +++ b/doc/api_reference.py @@ -0,0 +1,1334 @@ +"""Configuration for the API reference documentation.""" + + +def _get_guide(*refs, is_developer=False): + """Get the rst to refer to user/developer guide. + + `refs` is several references that can be used in the :ref:`...` directive. + """ + if len(refs) == 1: + ref_desc = f":ref:`{refs[0]}` section" + elif len(refs) == 2: + ref_desc = f":ref:`{refs[0]}` and :ref:`{refs[1]}` sections" + else: + ref_desc = ", ".join(f":ref:`{ref}`" for ref in refs[:-1]) + ref_desc += f", and :ref:`{refs[-1]}` sections" + + guide_name = "Developer" if is_developer else "User" + return f"**{guide_name} guide.** See the {ref_desc} for further details." + + +def _get_submodule(module_name, submodule_name): + """Get the submodule docstring and automatically add the hook. + + `module_name` is e.g. `sklearn.feature_extraction`, and `submodule_name` is e.g. + `image`, so we get the docstring and hook for `sklearn.feature_extraction.image` + submodule. `module_name` is used to reset the current module because autosummary + automatically changes the current module. + """ + lines = [ + f".. automodule:: {module_name}.{submodule_name}", + f".. currentmodule:: {module_name}", + ] + return "\n\n".join(lines) + + +""" +CONFIGURING API_REFERENCE +========================= + +API_REFERENCE maps each module name to a dictionary that consists of the following +components: + +short_summary (required) + The text to be printed on the index page; it has nothing to do the API reference + page of each module. +description (required, `None` if not needed) + The additional description for the module to be placed under the module + docstring, before the sections start. +sections (required) + A list of sections, each of which consists of: + - title (required, `None` if not needed): the section title, commonly it should + not be `None` except for the first section of a module, + - description (optional): the optional additional description for the section, + - autosummary (required): an autosummary block, assuming current module is the + current module name. + +Essentially, the rendered page would look like the following: + +|---------------------------------------------------------------------------------| +| {{ module_name }} | +| ================= | +| {{ module_docstring }} | +| {{ description }} | +| | +| {{ section_title_1 }} <-------------- Optional if one wants the first | +| --------------------- section to directly follow | +| {{ section_description_1 }} without a second-level heading. | +| {{ section_autosummary_1 }} | +| | +| {{ section_title_2 }} | +| --------------------- | +| {{ section_description_2 }} | +| {{ section_autosummary_2 }} | +| | +| More sections... | +|---------------------------------------------------------------------------------| + +Hooks will be automatically generated for each module and each section. For a module, +e.g., `sklearn.feature_extraction`, the hook would be `feature_extraction_ref`; for a +section, e.g., "From text" under `sklearn.feature_extraction`, the hook would be +`feature_extraction_ref-from-text`. However, note that a better way is to refer using +the :mod: directive, e.g., :mod:`sklearn.feature_extraction` for the module and +:mod:`sklearn.feature_extraction.text` for the section. Only in case that a section +is not a particular submodule does the hook become useful, e.g., the "Loaders" section +under `sklearn.datasets`. +""" + +API_REFERENCE = { + "sklearn": { + "short_summary": "Settings and information tools.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "config_context", + "get_config", + "set_config", + "show_versions", + ], + }, + ], + }, + "sklearn.base": { + "short_summary": "Base classes and utility functions.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "BaseEstimator", + "BiclusterMixin", + "ClassNamePrefixFeaturesOutMixin", + "ClassifierMixin", + "ClusterMixin", + "DensityMixin", + "MetaEstimatorMixin", + "OneToOneFeatureMixin", + "OutlierMixin", + "RegressorMixin", + "TransformerMixin", + "clone", + "is_classifier", + "is_regressor", + ], + } + ], + }, + "sklearn.calibration": { + "short_summary": "Probability calibration.", + "description": _get_guide("calibration"), + "sections": [ + { + "title": None, + "autosummary": ["CalibratedClassifierCV", "calibration_curve"], + }, + { + "title": "Visualization", + "autosummary": ["CalibrationDisplay"], + }, + ], + }, + "sklearn.cluster": { + "short_summary": "Clustering.", + "description": _get_guide("clustering", "biclustering"), + "sections": [ + { + "title": None, + "autosummary": [ + "AffinityPropagation", + "AgglomerativeClustering", + "Birch", + "BisectingKMeans", + "DBSCAN", + "FeatureAgglomeration", + "HDBSCAN", + "KMeans", + "MeanShift", + "MiniBatchKMeans", + "OPTICS", + "SpectralBiclustering", + "SpectralClustering", + "SpectralCoclustering", + "affinity_propagation", + "cluster_optics_dbscan", + "cluster_optics_xi", + "compute_optics_graph", + "dbscan", + "estimate_bandwidth", + "k_means", + "kmeans_plusplus", + "mean_shift", + "spectral_clustering", + "ward_tree", + ], + }, + ], + }, + "sklearn.compose": { + "short_summary": "Composite estimators.", + "description": _get_guide("combining_estimators"), + "sections": [ + { + "title": None, + "autosummary": [ + "ColumnTransformer", + "TransformedTargetRegressor", + "make_column_selector", + "make_column_transformer", + ], + }, + ], + }, + "sklearn.covariance": { + "short_summary": "Covariance estimation.", + "description": _get_guide("covariance"), + "sections": [ + { + "title": None, + "autosummary": [ + "EllipticEnvelope", + "EmpiricalCovariance", + "GraphicalLasso", + "GraphicalLassoCV", + "LedoitWolf", + "MinCovDet", + "OAS", + "ShrunkCovariance", + "empirical_covariance", + "graphical_lasso", + "ledoit_wolf", + "ledoit_wolf_shrinkage", + "oas", + "shrunk_covariance", + ], + }, + ], + }, + "sklearn.cross_decomposition": { + "short_summary": "Cross decomposition.", + "description": _get_guide("cross_decomposition"), + "sections": [ + { + "title": None, + "autosummary": ["CCA", "PLSCanonical", "PLSRegression", "PLSSVD"], + }, + ], + }, + "sklearn.datasets": { + "short_summary": "Datasets.", + "description": _get_guide("datasets"), + "sections": [ + { + "title": "Loaders", + "autosummary": [ + "clear_data_home", + "dump_svmlight_file", + "fetch_20newsgroups", + "fetch_20newsgroups_vectorized", + "fetch_california_housing", + "fetch_covtype", + "fetch_kddcup99", + "fetch_lfw_pairs", + "fetch_lfw_people", + "fetch_olivetti_faces", + "fetch_openml", + "fetch_rcv1", + "fetch_species_distributions", + "get_data_home", + "load_breast_cancer", + "load_diabetes", + "load_digits", + "load_files", + "load_iris", + "load_linnerud", + "load_sample_image", + "load_sample_images", + "load_svmlight_file", + "load_svmlight_files", + "load_wine", + ], + }, + { + "title": "Sample generators", + "autosummary": [ + "make_biclusters", + "make_blobs", + "make_checkerboard", + "make_circles", + "make_classification", + "make_friedman1", + "make_friedman2", + "make_friedman3", + "make_gaussian_quantiles", + "make_hastie_10_2", + "make_low_rank_matrix", + "make_moons", + "make_multilabel_classification", + "make_regression", + "make_s_curve", + "make_sparse_coded_signal", + "make_sparse_spd_matrix", + "make_sparse_uncorrelated", + "make_spd_matrix", + "make_swiss_roll", + ], + }, + ], + }, + "sklearn.decomposition": { + "short_summary": "Matrix decomposition.", + "description": _get_guide("decompositions"), + "sections": [ + { + "title": None, + "autosummary": [ + "DictionaryLearning", + "FactorAnalysis", + "FastICA", + "IncrementalPCA", + "KernelPCA", + "LatentDirichletAllocation", + "MiniBatchDictionaryLearning", + "MiniBatchNMF", + "MiniBatchSparsePCA", + "NMF", + "PCA", + "SparseCoder", + "SparsePCA", + "TruncatedSVD", + "dict_learning", + "dict_learning_online", + "fastica", + "non_negative_factorization", + "sparse_encode", + ], + }, + ], + }, + "sklearn.discriminant_analysis": { + "short_summary": "Discriminant analysis.", + "description": _get_guide("lda_qda"), + "sections": [ + { + "title": None, + "autosummary": [ + "LinearDiscriminantAnalysis", + "QuadraticDiscriminantAnalysis", + ], + }, + ], + }, + "sklearn.dummy": { + "short_summary": "Dummy estimators.", + "description": _get_guide("model_evaluation"), + "sections": [ + { + "title": None, + "autosummary": ["DummyClassifier", "DummyRegressor"], + }, + ], + }, + "sklearn.ensemble": { + "short_summary": "Ensemble methods.", + "description": _get_guide("ensemble"), + "sections": [ + { + "title": None, + "autosummary": [ + "AdaBoostClassifier", + "AdaBoostRegressor", + "BaggingClassifier", + "BaggingRegressor", + "ExtraTreesClassifier", + "ExtraTreesRegressor", + "GradientBoostingClassifier", + "GradientBoostingRegressor", + "HistGradientBoostingClassifier", + "HistGradientBoostingRegressor", + "IsolationForest", + "RandomForestClassifier", + "RandomForestRegressor", + "RandomTreesEmbedding", + "StackingClassifier", + "StackingRegressor", + "VotingClassifier", + "VotingRegressor", + ], + }, + ], + }, + "sklearn.exceptions": { + "short_summary": "Exceptions and warnings.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "ConvergenceWarning", + "DataConversionWarning", + "DataDimensionalityWarning", + "EfficiencyWarning", + "FitFailedWarning", + "InconsistentVersionWarning", + "NotFittedError", + "UndefinedMetricWarning", + ], + }, + ], + }, + "sklearn.experimental": { + "short_summary": "Experimental tools.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": ["enable_halving_search_cv", "enable_iterative_imputer"], + }, + ], + }, + "sklearn.feature_extraction": { + "short_summary": "Feature extraction.", + "description": _get_guide("feature_extraction"), + "sections": [ + { + "title": None, + "autosummary": ["DictVectorizer", "FeatureHasher"], + }, + { + "title": "From images", + "description": _get_submodule("sklearn.feature_extraction", "image"), + "autosummary": [ + "image.PatchExtractor", + "image.extract_patches_2d", + "image.grid_to_graph", + "image.img_to_graph", + "image.reconstruct_from_patches_2d", + ], + }, + { + "title": "From text", + "description": _get_submodule("sklearn.feature_extraction", "text"), + "autosummary": [ + "text.CountVectorizer", + "text.HashingVectorizer", + "text.TfidfTransformer", + "text.TfidfVectorizer", + ], + }, + ], + }, + "sklearn.feature_selection": { + "short_summary": "Feature selection.", + "description": _get_guide("feature_selection"), + "sections": [ + { + "title": None, + "autosummary": [ + "GenericUnivariateSelect", + "RFE", + "RFECV", + "SelectFdr", + "SelectFpr", + "SelectFromModel", + "SelectFwe", + "SelectKBest", + "SelectPercentile", + "SelectorMixin", + "SequentialFeatureSelector", + "VarianceThreshold", + "chi2", + "f_classif", + "f_regression", + "mutual_info_classif", + "mutual_info_regression", + "r_regression", + ], + }, + ], + }, + "sklearn.gaussian_process": { + "short_summary": "Gaussian processes.", + "description": _get_guide("gaussian_process"), + "sections": [ + { + "title": None, + "autosummary": [ + "GaussianProcessClassifier", + "GaussianProcessRegressor", + ], + }, + { + "title": "Kernels", + "description": _get_submodule("sklearn.gaussian_process", "kernels"), + "autosummary": [ + "kernels.CompoundKernel", + "kernels.ConstantKernel", + "kernels.DotProduct", + "kernels.ExpSineSquared", + "kernels.Exponentiation", + "kernels.Hyperparameter", + "kernels.Kernel", + "kernels.Matern", + "kernels.PairwiseKernel", + "kernels.Product", + "kernels.RBF", + "kernels.RationalQuadratic", + "kernels.Sum", + "kernels.WhiteKernel", + ], + }, + ], + }, + "sklearn.impute": { + "short_summary": "Imputation.", + "description": _get_guide("impute"), + "sections": [ + { + "title": None, + "autosummary": [ + "IterativeImputer", + "KNNImputer", + "MissingIndicator", + "SimpleImputer", + ], + }, + ], + }, + "sklearn.inspection": { + "short_summary": "Inspection.", + "description": _get_guide("inspection"), + "sections": [ + { + "title": None, + "autosummary": ["partial_dependence", "permutation_importance"], + }, + { + "title": "Plotting", + "autosummary": ["DecisionBoundaryDisplay", "PartialDependenceDisplay"], + }, + ], + }, + "sklearn.isotonic": { + "short_summary": "Isotonic regression.", + "description": _get_guide("isotonic"), + "sections": [ + { + "title": None, + "autosummary": [ + "IsotonicRegression", + "check_increasing", + "isotonic_regression", + ], + }, + ], + }, + "sklearn.kernel_approximation": { + "short_summary": "Isotonic regression.", + "description": _get_guide("kernel_approximation"), + "sections": [ + { + "title": None, + "autosummary": [ + "AdditiveChi2Sampler", + "Nystroem", + "PolynomialCountSketch", + "RBFSampler", + "SkewedChi2Sampler", + ], + }, + ], + }, + "sklearn.kernel_ridge": { + "short_summary": "Kernel ridge regression.", + "description": _get_guide("kernel_ridge"), + "sections": [ + { + "title": None, + "autosummary": ["KernelRidge"], + }, + ], + }, + "sklearn.linear_model": { + "short_summary": "Generalized linear models.", + "description": ( + _get_guide("linear_model") + + "\n\nThe following subsections are only rough guidelines: the same " + "estimator can fall into multiple categories, depending on its parameters." + ), + "sections": [ + { + "title": "Linear classifiers", + "autosummary": [ + "LogisticRegression", + "LogisticRegressionCV", + "PassiveAggressiveClassifier", + "Perceptron", + "RidgeClassifier", + "RidgeClassifierCV", + "SGDClassifier", + "SGDOneClassSVM", + ], + }, + { + "title": "Classical linear regressors", + "autosummary": ["LinearRegression", "Ridge", "RidgeCV", "SGDRegressor"], + }, + { + "title": "Regressors with variable selection", + "description": ( + "The following estimators have built-in variable selection fitting " + "procedures, but any estimator using a L1 or elastic-net penalty " + "also performs variable selection: typically " + ":class:`~linear_model.SGDRegressor` or " + ":class:`~sklearn.linear_model.SGDClassifier` with an appropriate " + "penalty." + ), + "autosummary": [ + "ElasticNet", + "ElasticNetCV", + "Lars", + "LarsCV", + "Lasso", + "LassoCV", + "LassoLars", + "LassoLarsCV", + "LassoLarsIC", + "OrthogonalMatchingPursuit", + "OrthogonalMatchingPursuitCV", + ], + }, + { + "title": "Bayesian regressors", + "autosummary": ["ARDRegression", "BayesianRidge"], + }, + { + "title": "Multi-task linear regressors with variable selection", + "description": ( + "These estimators fit multiple regression problems (or tasks)" + " jointly, while inducing sparse coefficients. While the inferred" + " coefficients may differ between the tasks, they are constrained" + " to agree on the features that are selected (non-zero" + " coefficients)." + ), + "autosummary": [ + "MultiTaskElasticNet", + "MultiTaskElasticNetCV", + "MultiTaskLasso", + "MultiTaskLassoCV", + ], + }, + { + "title": "Outlier-robust regressors", + "description": ( + "Any estimator using the Huber loss would also be robust to " + "outliers, e.g., :class:`~linear_model.SGDRegressor` with " + "``loss='huber'``." + ), + "autosummary": [ + "HuberRegressor", + "QuantileRegressor", + "RANSACRegressor", + "TheilSenRegressor", + ], + }, + { + "title": "Generalized linear models (GLM) for regression", + "description": ( + "These models allow for response variables to have error " + "distributions other than a normal distribution." + ), + "autosummary": [ + "GammaRegressor", + "PoissonRegressor", + "TweedieRegressor", + ], + }, + { + "title": "Miscellaneous", + "autosummary": [ + "PassiveAggressiveRegressor", + "enet_path", + "lars_path", + "lars_path_gram", + "lasso_path", + "orthogonal_mp", + "orthogonal_mp_gram", + "ridge_regression", + ], + }, + ], + }, + "sklearn.manifold": { + "short_summary": "Manifold learning.", + "description": _get_guide("manifold"), + "sections": [ + { + "title": None, + "autosummary": [ + "Isomap", + "LocallyLinearEmbedding", + "MDS", + "SpectralEmbedding", + "TSNE", + "locally_linear_embedding", + "smacof", + "spectral_embedding", + "trustworthiness", + ], + }, + ], + }, + "sklearn.metrics": { + "short_summary": "Metrics.", + "description": _get_guide("model_evaluation", "metrics"), + "sections": [ + { + "title": "Model selection interface", + "description": _get_guide("scoring_parameter"), + "autosummary": [ + "check_scoring", + "get_scorer", + "get_scorer_names", + "make_scorer", + ], + }, + { + "title": "Classification metrics", + "description": _get_guide("classification_metrics"), + "autosummary": [ + "accuracy_score", + "auc", + "average_precision_score", + "balanced_accuracy_score", + "brier_score_loss", + "class_likelihood_ratios", + "classification_report", + "cohen_kappa_score", + "confusion_matrix", + "d2_log_loss_score", + "dcg_score", + "det_curve", + "f1_score", + "fbeta_score", + "hamming_loss", + "hinge_loss", + "jaccard_score", + "log_loss", + "matthews_corrcoef", + "multilabel_confusion_matrix", + "ndcg_score", + "precision_recall_curve", + "precision_recall_fscore_support", + "precision_score", + "recall_score", + "roc_auc_score", + "roc_curve", + "top_k_accuracy_score", + "zero_one_loss", + ], + }, + { + "title": "Regression metrics", + "description": _get_guide("regression_metrics"), + "autosummary": [ + "d2_absolute_error_score", + "d2_pinball_score", + "d2_tweedie_score", + "explained_variance_score", + "max_error", + "mean_absolute_error", + "mean_absolute_percentage_error", + "mean_gamma_deviance", + "mean_pinball_loss", + "mean_poisson_deviance", + "mean_squared_error", + "mean_squared_log_error", + "mean_tweedie_deviance", + "median_absolute_error", + "r2_score", + "root_mean_squared_error", + "root_mean_squared_log_error", + ], + }, + { + "title": "Multilabel ranking metrics", + "description": _get_guide("multilabel_ranking_metrics"), + "autosummary": [ + "coverage_error", + "label_ranking_average_precision_score", + "label_ranking_loss", + ], + }, + { + "title": "Clustering metrics", + "description": ( + _get_submodule("sklearn.metrics", "cluster") + + "\n\n" + + _get_guide("clustering_evaluation") + ), + "autosummary": [ + "adjusted_mutual_info_score", + "adjusted_rand_score", + "calinski_harabasz_score", + "cluster.contingency_matrix", + "cluster.pair_confusion_matrix", + "completeness_score", + "davies_bouldin_score", + "fowlkes_mallows_score", + "homogeneity_completeness_v_measure", + "homogeneity_score", + "mutual_info_score", + "normalized_mutual_info_score", + "rand_score", + "silhouette_samples", + "silhouette_score", + "v_measure_score", + ], + }, + { + "title": "Biclustering metrics", + "description": _get_guide("biclustering_evaluation"), + "autosummary": ["consensus_score"], + }, + { + "title": "Distance metrics", + "autosummary": ["DistanceMetric"], + }, + { + "title": "Pairwise metrics", + "description": ( + _get_submodule("sklearn.metrics", "pairwise") + + "\n\n" + + _get_guide("metrics") + ), + "autosummary": [ + "pairwise.additive_chi2_kernel", + "pairwise.chi2_kernel", + "pairwise.cosine_distances", + "pairwise.cosine_similarity", + "pairwise.distance_metrics", + "pairwise.euclidean_distances", + "pairwise.haversine_distances", + "pairwise.kernel_metrics", + "pairwise.laplacian_kernel", + "pairwise.linear_kernel", + "pairwise.manhattan_distances", + "pairwise.nan_euclidean_distances", + "pairwise.paired_cosine_distances", + "pairwise.paired_distances", + "pairwise.paired_euclidean_distances", + "pairwise.paired_manhattan_distances", + "pairwise.pairwise_kernels", + "pairwise.polynomial_kernel", + "pairwise.rbf_kernel", + "pairwise.sigmoid_kernel", + "pairwise_distances", + "pairwise_distances_argmin", + "pairwise_distances_argmin_min", + "pairwise_distances_chunked", + ], + }, + { + "title": "Plotting", + "description": _get_guide("visualizations"), + "autosummary": [ + "ConfusionMatrixDisplay", + "DetCurveDisplay", + "PrecisionRecallDisplay", + "PredictionErrorDisplay", + "RocCurveDisplay", + ], + }, + ], + }, + "sklearn.mixture": { + "short_summary": "Gaussian mixture models.", + "description": _get_guide("mixture"), + "sections": [ + { + "title": None, + "autosummary": ["BayesianGaussianMixture", "GaussianMixture"], + }, + ], + }, + "sklearn.model_selection": { + "short_summary": "Model selection.", + "description": _get_guide("cross_validation", "grid_search", "learning_curve"), + "sections": [ + { + "title": "Splitters", + "autosummary": [ + "GroupKFold", + "GroupShuffleSplit", + "KFold", + "LeaveOneGroupOut", + "LeaveOneOut", + "LeavePGroupsOut", + "LeavePOut", + "PredefinedSplit", + "RepeatedKFold", + "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedGroupKFold", + "StratifiedKFold", + "StratifiedShuffleSplit", + "TimeSeriesSplit", + "check_cv", + "train_test_split", + ], + }, + { + "title": "Hyper-parameter optimizers", + "autosummary": [ + "GridSearchCV", + "HalvingGridSearchCV", + "HalvingRandomSearchCV", + "ParameterGrid", + "ParameterSampler", + "RandomizedSearchCV", + ], + }, + { + "title": "Post-fit model tuning", + "autosummary": [ + "FixedThresholdClassifier", + "TunedThresholdClassifierCV", + ], + }, + { + "title": "Model validation", + "autosummary": [ + "cross_val_predict", + "cross_val_score", + "cross_validate", + "learning_curve", + "permutation_test_score", + "validation_curve", + ], + }, + { + "title": "Visualization", + "autosummary": ["LearningCurveDisplay", "ValidationCurveDisplay"], + }, + ], + }, + "sklearn.multiclass": { + "short_summary": "Multiclass classification.", + "description": _get_guide("multiclass_classification"), + "sections": [ + { + "title": None, + "autosummary": [ + "OneVsOneClassifier", + "OneVsRestClassifier", + "OutputCodeClassifier", + ], + }, + ], + }, + "sklearn.multioutput": { + "short_summary": "Multioutput regression and classification.", + "description": _get_guide( + "multilabel_classification", + "multiclass_multioutput_classification", + "multioutput_regression", + ), + "sections": [ + { + "title": None, + "autosummary": [ + "ClassifierChain", + "MultiOutputClassifier", + "MultiOutputRegressor", + "RegressorChain", + ], + }, + ], + }, + "sklearn.naive_bayes": { + "short_summary": "Naive Bayes.", + "description": _get_guide("naive_bayes"), + "sections": [ + { + "title": None, + "autosummary": [ + "BernoulliNB", + "CategoricalNB", + "ComplementNB", + "GaussianNB", + "MultinomialNB", + ], + }, + ], + }, + "sklearn.neighbors": { + "short_summary": "Nearest neighbors.", + "description": _get_guide("neighbors"), + "sections": [ + { + "title": None, + "autosummary": [ + "BallTree", + "KDTree", + "KNeighborsClassifier", + "KNeighborsRegressor", + "KNeighborsTransformer", + "KernelDensity", + "LocalOutlierFactor", + "NearestCentroid", + "NearestNeighbors", + "NeighborhoodComponentsAnalysis", + "RadiusNeighborsClassifier", + "RadiusNeighborsRegressor", + "RadiusNeighborsTransformer", + "kneighbors_graph", + "radius_neighbors_graph", + "sort_graph_by_row_values", + ], + }, + ], + }, + "sklearn.neural_network": { + "short_summary": "Neural network models.", + "description": _get_guide( + "neural_networks_supervised", "neural_networks_unsupervised" + ), + "sections": [ + { + "title": None, + "autosummary": ["BernoulliRBM", "MLPClassifier", "MLPRegressor"], + }, + ], + }, + "sklearn.pipeline": { + "short_summary": "Pipeline.", + "description": _get_guide("combining_estimators"), + "sections": [ + { + "title": None, + "autosummary": [ + "FeatureUnion", + "Pipeline", + "make_pipeline", + "make_union", + ], + }, + ], + }, + "sklearn.preprocessing": { + "short_summary": "Preprocessing and normalization.", + "description": _get_guide("preprocessing"), + "sections": [ + { + "title": None, + "autosummary": [ + "Binarizer", + "FunctionTransformer", + "KBinsDiscretizer", + "KernelCenterer", + "LabelBinarizer", + "LabelEncoder", + "MaxAbsScaler", + "MinMaxScaler", + "MultiLabelBinarizer", + "Normalizer", + "OneHotEncoder", + "OrdinalEncoder", + "PolynomialFeatures", + "PowerTransformer", + "QuantileTransformer", + "RobustScaler", + "SplineTransformer", + "StandardScaler", + "TargetEncoder", + "add_dummy_feature", + "binarize", + "label_binarize", + "maxabs_scale", + "minmax_scale", + "normalize", + "power_transform", + "quantile_transform", + "robust_scale", + "scale", + ], + }, + ], + }, + "sklearn.random_projection": { + "short_summary": "Random projection.", + "description": _get_guide("random_projection"), + "sections": [ + { + "title": None, + "autosummary": [ + "GaussianRandomProjection", + "SparseRandomProjection", + "johnson_lindenstrauss_min_dim", + ], + }, + ], + }, + "sklearn.semi_supervised": { + "short_summary": "Semi-supervised learning.", + "description": _get_guide("semi_supervised"), + "sections": [ + { + "title": None, + "autosummary": [ + "LabelPropagation", + "LabelSpreading", + "SelfTrainingClassifier", + ], + }, + ], + }, + "sklearn.svm": { + "short_summary": "Support vector machines.", + "description": _get_guide("svm"), + "sections": [ + { + "title": None, + "autosummary": [ + "LinearSVC", + "LinearSVR", + "NuSVC", + "NuSVR", + "OneClassSVM", + "SVC", + "SVR", + "l1_min_c", + ], + }, + ], + }, + "sklearn.tree": { + "short_summary": "Decision trees.", + "description": _get_guide("tree"), + "sections": [ + { + "title": None, + "autosummary": [ + "DecisionTreeClassifier", + "DecisionTreeRegressor", + "ExtraTreeClassifier", + "ExtraTreeRegressor", + ], + }, + { + "title": "Exporting", + "autosummary": ["export_graphviz", "export_text"], + }, + { + "title": "Plotting", + "autosummary": ["plot_tree"], + }, + ], + }, + "sklearn.utils": { + "short_summary": "Utilities.", + "description": _get_guide("developers-utils", is_developer=True), + "sections": [ + { + "title": None, + "autosummary": [ + "Bunch", + "_safe_indexing", + "as_float_array", + "assert_all_finite", + "deprecated", + "estimator_html_repr", + "gen_batches", + "gen_even_slices", + "indexable", + "murmurhash3_32", + "resample", + "safe_mask", + "safe_sqr", + "shuffle", + ], + }, + { + "title": "Input and parameter validation", + "description": _get_submodule("sklearn.utils", "validation"), + "autosummary": [ + "check_X_y", + "check_array", + "check_consistent_length", + "check_random_state", + "check_scalar", + "validation.check_is_fitted", + "validation.check_memory", + "validation.check_symmetric", + "validation.column_or_1d", + "validation.has_fit_parameter", + ], + }, + { + "title": "Meta-estimators", + "description": _get_submodule("sklearn.utils", "metaestimators"), + "autosummary": ["metaestimators.available_if"], + }, + { + "title": "Weight handling based on class labels", + "description": _get_submodule("sklearn.utils", "class_weight"), + "autosummary": [ + "class_weight.compute_class_weight", + "class_weight.compute_sample_weight", + ], + }, + { + "title": "Dealing with multiclass target in classifiers", + "description": _get_submodule("sklearn.utils", "multiclass"), + "autosummary": [ + "multiclass.is_multilabel", + "multiclass.type_of_target", + "multiclass.unique_labels", + ], + }, + { + "title": "Optimal mathematical operations", + "description": _get_submodule("sklearn.utils", "extmath"), + "autosummary": [ + "extmath.density", + "extmath.fast_logdet", + "extmath.randomized_range_finder", + "extmath.randomized_svd", + "extmath.safe_sparse_dot", + "extmath.weighted_mode", + ], + }, + { + "title": "Working with sparse matrices and arrays", + "description": _get_submodule("sklearn.utils", "sparsefuncs"), + "autosummary": [ + "sparsefuncs.incr_mean_variance_axis", + "sparsefuncs.inplace_column_scale", + "sparsefuncs.inplace_csr_column_scale", + "sparsefuncs.inplace_row_scale", + "sparsefuncs.inplace_swap_column", + "sparsefuncs.inplace_swap_row", + "sparsefuncs.mean_variance_axis", + ], + }, + { + "title": None, + "description": _get_submodule("sklearn.utils", "sparsefuncs_fast"), + "autosummary": [ + "sparsefuncs_fast.inplace_csr_row_normalize_l1", + "sparsefuncs_fast.inplace_csr_row_normalize_l2", + ], + }, + { + "title": "Working with graphs", + "description": _get_submodule("sklearn.utils", "graph"), + "autosummary": ["graph.single_source_shortest_path_length"], + }, + { + "title": "Random sampling", + "description": _get_submodule("sklearn.utils", "random"), + "autosummary": ["random.sample_without_replacement"], + }, + { + "title": "Auxiliary functions that operate on arrays", + "description": _get_submodule("sklearn.utils", "arrayfuncs"), + "autosummary": ["arrayfuncs.min_pos"], + }, + { + "title": "Metadata routing", + "description": ( + _get_submodule("sklearn.utils", "metadata_routing") + + "\n\n" + + _get_guide("metadata_routing") + ), + "autosummary": [ + "metadata_routing.MetadataRequest", + "metadata_routing.MetadataRouter", + "metadata_routing.MethodMapping", + "metadata_routing.get_routing_for_object", + "metadata_routing.process_routing", + ], + }, + { + "title": "Discovering scikit-learn objects", + "description": _get_submodule("sklearn.utils", "discovery"), + "autosummary": [ + "discovery.all_displays", + "discovery.all_estimators", + "discovery.all_functions", + ], + }, + { + "title": "API compatibility checkers", + "description": _get_submodule("sklearn.utils", "estimator_checks"), + "autosummary": [ + "estimator_checks.check_estimator", + "estimator_checks.parametrize_with_checks", + ], + }, + { + "title": "Parallel computing", + "description": _get_submodule("sklearn.utils", "parallel"), + "autosummary": [ + "parallel.Parallel", + "parallel.delayed", + ], + }, + ], + }, +} + + +""" +CONFIGURING DEPRECATED_API_REFERENCE +==================================== + +DEPRECATED_API_REFERENCE maps each deprecation target version to a corresponding +autosummary block. It will be placed at the bottom of the API index page under the +"Recently deprecated" section. Essentially, the rendered section would look like the +following: + +|------------------------------------------| +| To be removed in {{ version_1 }} | +| -------------------------------- | +| {{ autosummary_1 }} | +| | +| To be removed in {{ version_2 }} | +| -------------------------------- | +| {{ autosummary_2 }} | +| | +| More versions... | +|------------------------------------------| + +Note that the autosummary here assumes that the current module is `sklearn`, i.e., if +`sklearn.utils.Memory` is deprecated, one should put `utils.Memory` in the "entries" +slot of the autosummary block. + +Example: + +DEPRECATED_API_REFERENCE = { + "0.24": [ + "model_selection.fit_grid_point", + "utils.safe_indexing", + ], +} +""" + +DEPRECATED_API_REFERENCE = { + "1.7": [ + "utils.parallel_backend", + "utils.register_parallel_backend", + ] +} # type: ignore diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst index 77341047857b5..c16385943f9ad 100644 --- a/doc/common_pitfalls.rst +++ b/doc/common_pitfalls.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _common_pitfalls: ========================================= @@ -104,6 +98,26 @@ be the average of the train subset, **not** the average of all the data. If the test subset is included in the average calculation, information from the test subset is influencing the model. +How to avoid data leakage +------------------------- + +Below are some tips on avoiding data leakage: + +* Always split the data into train and test subsets first, particularly + before any preprocessing steps. +* Never include test data when using the `fit` and `fit_transform` + methods. Using all the data, e.g., `fit(X)`, can result in overly optimistic + scores. + + Conversely, the `transform` method should be used on both train and test + subsets as the same preprocessing should be applied to all the data. + This can be achieved by using `fit_transform` on the train subset and + `transform` on the test subset. +* The scikit-learn :ref:`pipeline ` is a great way to prevent data + leakage as it ensures that the appropriate method is performed on the + correct data subset. The pipeline is ideal for use in cross-validation + and hyper-parameter tuning functions. + An example of data leakage during preprocessing is detailed below. Data leakage during pre-processing @@ -211,27 +225,8 @@ method is used during fitting and predicting:: >>> from sklearn.model_selection import cross_val_score >>> scores = cross_val_score(pipeline, X, y) >>> print(f"Mean accuracy: {scores.mean():.2f}+/-{scores.std():.2f}") - Mean accuracy: 0.45+/-0.07 + Mean accuracy: 0.46+/-0.07 -How to avoid data leakage -------------------------- - -Below are some tips on avoiding data leakage: - -* Always split the data into train and test subsets first, particularly - before any preprocessing steps. -* Never include test data when using the `fit` and `fit_transform` - methods. Using all the data, e.g., `fit(X)`, can result in overly optimistic - scores. - - Conversely, the `transform` method should be used on both train and test - subsets as the same preprocessing should be applied to all the data. - This can be achieved by using `fit_transform` on the train subset and - `transform` on the test subset. -* The scikit-learn :ref:`pipeline ` is a great way to prevent data - leakage as it ensures that the appropriate method is performed on the - correct data subset. The pipeline is ideal for use in cross-validation - and hyper-parameter tuning functions. .. _randomness: @@ -413,39 +408,40 @@ it will allow the estimator RNG to vary for each fold. illustration purpose: what matters is what we pass to the :class:`~sklearn.ensemble.RandomForestClassifier` estimator. -**Cloning** +.. dropdown:: Cloning -Another subtle side effect of passing `RandomState` instances is how -:func:`~sklearn.clone` will work:: + Another subtle side effect of passing `RandomState` instances is how + :func:`~sklearn.base.clone` will work:: - >>> from sklearn import clone - >>> from sklearn.ensemble import RandomForestClassifier - >>> import numpy as np + >>> from sklearn import clone + >>> from sklearn.ensemble import RandomForestClassifier + >>> import numpy as np + + >>> rng = np.random.RandomState(0) + >>> a = RandomForestClassifier(random_state=rng) + >>> b = clone(a) + + Since a `RandomState` instance was passed to `a`, `a` and `b` are not clones + in the strict sense, but rather clones in the statistical sense: `a` and `b` + will still be different models, even when calling `fit(X, y)` on the same + data. Moreover, `a` and `b` will influence each-other since they share the + same internal RNG: calling `a.fit` will consume `b`'s RNG, and calling + `b.fit` will consume `a`'s RNG, since they are the same. This bit is true for + any estimators that share a `random_state` parameter; it is not specific to + clones. + + If an integer were passed, `a` and `b` would be exact clones and they would not + influence each other. + + .. warning:: + Even though :func:`~sklearn.base.clone` is rarely used in user code, it is + called pervasively throughout scikit-learn codebase: in particular, most + meta-estimators that accept non-fitted estimators call + :func:`~sklearn.base.clone` internally + (:class:`~sklearn.model_selection.GridSearchCV`, + :class:`~sklearn.ensemble.StackingClassifier`, + :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.). - >>> rng = np.random.RandomState(0) - >>> a = RandomForestClassifier(random_state=rng) - >>> b = clone(a) - -Since a `RandomState` instance was passed to `a`, `a` and `b` are not clones -in the strict sense, but rather clones in the statistical sense: `a` and `b` -will still be different models, even when calling `fit(X, y)` on the same -data. Moreover, `a` and `b` will influence each-other since they share the -same internal RNG: calling `a.fit` will consume `b`'s RNG, and calling -`b.fit` will consume `a`'s RNG, since they are the same. This bit is true for -any estimators that share a `random_state` parameter; it is not specific to -clones. - -If an integer were passed, `a` and `b` would be exact clones and they would not -influence each other. - -.. warning:: - Even though :func:`~sklearn.clone` is rarely used in user code, it is - called pervasively throughout scikit-learn codebase: in particular, most - meta-estimators that accept non-fitted estimators call - :func:`~sklearn.clone` internally - (:class:`~sklearn.model_selection.GridSearchCV`, - :class:`~sklearn.ensemble.StackingClassifier`, - :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.). CV splitters ............ @@ -553,7 +549,7 @@ When we evaluate a randomized estimator performance by cross-validation, we want to make sure that the estimator can yield accurate predictions for new data, but we also want to make sure that the estimator is robust w.r.t. its random initialization. For example, we would like the random weights -initialization of a :class:`~sklearn.linear_model.SGDCLassifier` to be +initialization of a :class:`~sklearn.linear_model.SGDClassifier` to be consistently good across all folds: otherwise, when we train that estimator on new data, we might get unlucky and the random initialization may lead to bad performance. Similarly, we want a random forest to be robust w.r.t the diff --git a/doc/communication_team.rst b/doc/communication_team.rst index 48a876bd35725..30e4f1169cfc9 100644 --- a/doc/communication_team.rst +++ b/doc/communication_team.rst @@ -11,6 +11,6 @@

-

francoisgoupil

+

François Goupil

diff --git a/doc/computing.rst b/doc/computing.rst index 6732b754918b0..9f166432006b2 100644 --- a/doc/computing.rst +++ b/doc/computing.rst @@ -1,13 +1,7 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - ============================ Computing with scikit-learn ============================ -.. include:: includes/big_toc_css.rst - .. toctree:: :maxdepth: 2 diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst index dd5720630c377..a7b6d3a37001e 100644 --- a/doc/computing/computational_performance.rst +++ b/doc/computing/computational_performance.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _computational_performance: .. currentmodule:: sklearn @@ -39,10 +35,11 @@ machine learning toolkit is the latency at which predictions can be made in a production environment. The main factors that influence the prediction latency are - 1. Number of features - 2. Input data representation and sparsity - 3. Model complexity - 4. Feature extraction + +1. Number of features +2. Input data representation and sparsity +3. Model complexity +4. Feature extraction A last major parameter is also the possibility to do predictions in bulk or one-at-a-time mode. @@ -224,9 +221,9 @@ files, tokenizing the text and hashing it into a common vector space) is taking 100 to 500 times more time than the actual prediction code, depending on the chosen model. - .. |prediction_time| image:: ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png - :target: ../auto_examples/applications/plot_out_of_core_classification.html - :scale: 80 +.. |prediction_time| image:: ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png + :target: ../auto_examples/applications/plot_out_of_core_classification.html + :scale: 80 .. centered:: |prediction_time| @@ -283,10 +280,11 @@ scikit-learn install with the following command:: python -c "import sklearn; sklearn.show_versions()" Optimized BLAS / LAPACK implementations include: - - Atlas (need hardware specific tuning by rebuilding on the target machine) - - OpenBLAS - - MKL - - Apple Accelerate and vecLib frameworks (OSX only) + +- Atlas (need hardware specific tuning by rebuilding on the target machine) +- OpenBLAS +- MKL +- Apple Accelerate and vecLib frameworks (OSX only) More information can be found on the `NumPy install page `_ and in this @@ -364,5 +362,5 @@ sufficient to not generate the relevant features, leaving their columns empty. Links ...... - - :ref:`scikit-learn developer performance documentation ` - - `Scipy sparse matrix formats documentation `_ +- :ref:`scikit-learn developer performance documentation ` +- `Scipy sparse matrix formats documentation `_ diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst index b7add493a88b1..5c15cd9db440e 100644 --- a/doc/computing/parallelism.rst +++ b/doc/computing/parallelism.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - Parallelism, resource management, and configuration =================================================== @@ -87,15 +83,15 @@ will use as many threads as possible, i.e. as many threads as logical cores. You can control the exact number of threads that are used either: - - via the ``OMP_NUM_THREADS`` environment variable, for instance when: - running a python script: +- via the ``OMP_NUM_THREADS`` environment variable, for instance when: + running a python script: - .. prompt:: bash $ + .. prompt:: bash $ - OMP_NUM_THREADS=4 python my_script.py + OMP_NUM_THREADS=4 python my_script.py - - or via `threadpoolctl` as explained by `this piece of documentation - `_. +- or via `threadpoolctl` as explained by `this piece of documentation + `_. Parallel NumPy and SciPy routines from numerical libraries .......................................................... @@ -107,15 +103,15 @@ such as MKL, OpenBLAS or BLIS. You can control the exact number of threads used by BLAS for each library using environment variables, namely: - - ``MKL_NUM_THREADS`` sets the number of thread MKL uses, - - ``OPENBLAS_NUM_THREADS`` sets the number of threads OpenBLAS uses - - ``BLIS_NUM_THREADS`` sets the number of threads BLIS uses +- ``MKL_NUM_THREADS`` sets the number of thread MKL uses, +- ``OPENBLAS_NUM_THREADS`` sets the number of threads OpenBLAS uses +- ``BLIS_NUM_THREADS`` sets the number of threads BLIS uses Note that BLAS & LAPACK implementations can also be impacted by `OMP_NUM_THREADS`. To check whether this is the case in your environment, you can inspect how the number of threads effectively used by those libraries -is affected when running the the following command in a bash or zsh terminal -for different values of `OMP_NUM_THREADS`:: +is affected when running the following command in a bash or zsh terminal +for different values of `OMP_NUM_THREADS`: .. prompt:: bash $ @@ -316,3 +312,29 @@ most machines. Users looking for the best performance might want to tune this variable using powers of 2 so as to get the best parallelism behavior for their hardware, especially with respect to their caches' sizes. + +`SKLEARN_WARNINGS_AS_ERRORS` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This environment variable is used to turn warnings into errors in tests and +documentation build. + +Some CI (Continuous Integration) builds set `SKLEARN_WARNINGS_AS_ERRORS=1`, for +example to make sure that we catch deprecation warnings from our dependencies +and that we adapt our code. + +To locally run with the same "warnings as errors" setting as in these CI builds +you can set `SKLEARN_WARNINGS_AS_ERRORS=1`. + +By default, warnings are not turned into errors. This is the case if +`SKLEARN_WARNINGS_AS_ERRORS` is unset, or `SKLEARN_WARNINGS_AS_ERRORS=0`. + +This environment variable use specific warning filters to ignore some warnings, +since sometimes warnings originate from third-party libraries and there is not +much we can do about it. You can see the warning filters in the +`_get_warnings_filters_info_list` function in `sklearn/utils/_testing.py`. + +Note that for documentation build, `SKLEARN_WARNING_AS_ERRORS=1` is checking +that the documentation build, in particular running examples, does not produce +any warnings. This is different from the `-W` `sphinx-build` argument that +catches syntax warnings in the rst files. diff --git a/doc/computing/scaling_strategies.rst b/doc/computing/scaling_strategies.rst index 277d499f4cc13..286a1e79d0a8c 100644 --- a/doc/computing/scaling_strategies.rst +++ b/doc/computing/scaling_strategies.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _scaling_strategies: Strategies to scale computationally: bigger data @@ -20,9 +16,9 @@ data that cannot fit in a computer's main memory (RAM). Here is a sketch of a system designed to achieve this goal: - 1. a way to stream instances - 2. a way to extract features from instances - 3. an incremental algorithm +1. a way to stream instances +2. a way to extract features from instances +3. an incremental algorithm Streaming instances .................... @@ -62,29 +58,29 @@ balances relevancy and memory footprint could involve some tuning [1]_. Here is a list of incremental estimators for different tasks: - - Classification - + :class:`sklearn.naive_bayes.MultinomialNB` - + :class:`sklearn.naive_bayes.BernoulliNB` - + :class:`sklearn.linear_model.Perceptron` - + :class:`sklearn.linear_model.SGDClassifier` - + :class:`sklearn.linear_model.PassiveAggressiveClassifier` - + :class:`sklearn.neural_network.MLPClassifier` - - Regression - + :class:`sklearn.linear_model.SGDRegressor` - + :class:`sklearn.linear_model.PassiveAggressiveRegressor` - + :class:`sklearn.neural_network.MLPRegressor` - - Clustering - + :class:`sklearn.cluster.MiniBatchKMeans` - + :class:`sklearn.cluster.Birch` - - Decomposition / feature Extraction - + :class:`sklearn.decomposition.MiniBatchDictionaryLearning` - + :class:`sklearn.decomposition.IncrementalPCA` - + :class:`sklearn.decomposition.LatentDirichletAllocation` - + :class:`sklearn.decomposition.MiniBatchNMF` - - Preprocessing - + :class:`sklearn.preprocessing.StandardScaler` - + :class:`sklearn.preprocessing.MinMaxScaler` - + :class:`sklearn.preprocessing.MaxAbsScaler` +- Classification + + :class:`sklearn.naive_bayes.MultinomialNB` + + :class:`sklearn.naive_bayes.BernoulliNB` + + :class:`sklearn.linear_model.Perceptron` + + :class:`sklearn.linear_model.SGDClassifier` + + :class:`sklearn.linear_model.PassiveAggressiveClassifier` + + :class:`sklearn.neural_network.MLPClassifier` +- Regression + + :class:`sklearn.linear_model.SGDRegressor` + + :class:`sklearn.linear_model.PassiveAggressiveRegressor` + + :class:`sklearn.neural_network.MLPRegressor` +- Clustering + + :class:`sklearn.cluster.MiniBatchKMeans` + + :class:`sklearn.cluster.Birch` +- Decomposition / feature Extraction + + :class:`sklearn.decomposition.MiniBatchDictionaryLearning` + + :class:`sklearn.decomposition.IncrementalPCA` + + :class:`sklearn.decomposition.LatentDirichletAllocation` + + :class:`sklearn.decomposition.MiniBatchNMF` +- Preprocessing + + :class:`sklearn.preprocessing.StandardScaler` + + :class:`sklearn.preprocessing.MinMaxScaler` + + :class:`sklearn.preprocessing.MaxAbsScaler` For classification, a somewhat important thing to note is that although a stateless feature extraction routine may be able to cope with new/unseen diff --git a/doc/conf.py b/doc/conf.py index 176a0d8b3a7d1..9923a24260267 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,25 +10,28 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import warnings import re +import sys +import warnings from datetime import datetime -from sklearn.externals._packaging.version import parse from pathlib import Path -from io import StringIO + +from sklearn.externals._packaging.version import parse +from sklearn.utils._testing import turn_warnings_into_errors # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. +sys.path.insert(0, os.path.abspath(".")) sys.path.insert(0, os.path.abspath("sphinxext")) -from github_link import make_linkcode_resolve +import jinja2 import sphinx_gallery +from github_link import make_linkcode_resolve +from sphinx_gallery.notebook import add_code_cell, add_markdown_cell from sphinx_gallery.sorting import ExampleTitleSortKey -from sphinx_gallery.notebook import add_markdown_cell, add_code_cell try: # Configure plotly to integrate its output into the HTML pages generated by @@ -54,19 +57,33 @@ "sphinx.ext.intersphinx", "sphinx.ext.imgconverter", "sphinx_gallery.gen_gallery", - "sphinx_issues", - "add_toctree_functions", "sphinx-prompt", + "sphinx_copybutton", "sphinxext.opengraph", - "doi_role", - "allow_nan_estimators", "matplotlib.sphinxext.plot_directive", + "sphinxcontrib.sass", + "sphinx_remove_toctrees", + "sphinx_design", + # See sphinxext/ + "allow_nan_estimators", + "autoshortsummary", + "doi_role", + "dropdown_anchors", + "move_gallery_links", + "override_pst_pagetoc", + "sphinx_issues", ] +# Specify how to identify the prompt when copying code snippets +copybutton_prompt_text = r">>> |\.\.\. " +copybutton_prompt_is_regexp = True +copybutton_exclude = "style" + try: import jupyterlite_sphinx # noqa: F401 extensions.append("jupyterlite_sphinx") + with_jupyterlite = True except ImportError: # In some cases we don't want to require jupyterlite_sphinx to be installed, # e.g. the doc-min-dependencies build @@ -74,6 +91,7 @@ "jupyterlite_sphinx is not installed, you need to install it " "if you want JupyterLite links to appear in each example" ) + with_jupyterlite = False # Produce `plot::` directives for examples that contain `import matplotlib` or # `from matplotlib import`. @@ -86,8 +104,12 @@ plot_html_show_formats = False plot_html_show_source_link = False -# this is needed for some reason... -# see https://github.com/numpy/numpydoc/issues/69 +# We do not need the table of class members because `sphinxext/override_pst_pagetoc.py` +# will show them in the secondary sidebar +numpydoc_show_class_members = False +numpydoc_show_inherited_class_members = False + +# We want in-page toc of class members instead of a separate page for each entry numpydoc_class_members_toctree = False @@ -101,8 +123,6 @@ extensions.append("sphinx.ext.mathjax") mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js" -autodoc_default_options = {"members": True, "inherited-members": True} - # Add any paths that contain templates here, relative to this directory. templates_path = ["templates"] @@ -113,10 +133,10 @@ source_suffix = ".rst" # The encoding of source files. -# source_encoding = 'utf-8' +source_encoding = "utf-8" # The main toctree document. -root_doc = "contents" +root_doc = "index" # General information about the project. project = "scikit-learn" @@ -150,7 +170,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build", "templates", "includes", "themes"] +exclude_patterns = [ + "_build", + "templates", + "includes", + "**/sg_execution_times.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -167,9 +192,6 @@ # output. They are ignored by default. # show_authors = False -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -178,21 +200,89 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = "scikit-learn-modern" +html_theme = "pydata_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "legacy_google_analytics": True, - "analytics": True, - "mathjax_path": mathjax_path, - "link_to_live_contributing_page": not parsed_version.is_devrelease, + # -- General configuration ------------------------------------------------ + "sidebar_includehidden": True, + "use_edit_page_button": True, + "external_links": [], + "icon_links_label": "Icon Links", + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/scikit-learn/scikit-learn", + "icon": "fa-brands fa-square-github", + "type": "fontawesome", + }, + ], + "analytics": { + "plausible_analytics_domain": "scikit-learn.org", + "plausible_analytics_url": "https://views.scientific-python.org/js/script.js", + }, + # If "prev-next" is included in article_footer_items, then setting show_prev_next + # to True would repeat prev and next links. See + # https://github.com/pydata/pydata-sphinx-theme/blob/b731dc230bc26a3d1d1bb039c56c977a9b3d25d8/src/pydata_sphinx_theme/theme/pydata_sphinx_theme/layout.html#L118-L129 + "show_prev_next": False, + "search_bar_text": "Search the docs ...", + "navigation_with_keys": False, + "collapse_navigation": False, + "navigation_depth": 2, + "show_nav_level": 1, + "show_toc_level": 1, + "navbar_align": "left", + "header_links_before_dropdown": 5, + "header_dropdown_text": "More", + # The switcher requires a JSON file with the list of documentation versions, which + # is generated by the script `build_tools/circle/list_versions.py` and placed under + # the `js/` static directory; it will then be copied to the `_static` directory in + # the built documentation + "switcher": { + "json_url": "https://scikit-learn.org/dev/_static/versions.json", + "version_match": release, + }, + # check_switcher may be set to False if docbuild pipeline fails. See + # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/version-dropdown.html#configure-switcher-json-url + "check_switcher": True, + "pygments_light_style": "tango", + "pygments_dark_style": "monokai", + "logo": { + "alt_text": "scikit-learn homepage", + "image_relative": "logos/scikit-learn-logo-small.png", + "image_light": "logos/scikit-learn-logo-small.png", + "image_dark": "logos/scikit-learn-logo-small.png", + }, + "surface_warnings": True, + # -- Template placement in theme layouts ---------------------------------- + "navbar_start": ["navbar-logo"], + # Note that the alignment of navbar_center is controlled by navbar_align + "navbar_center": ["navbar-nav"], + "navbar_end": ["theme-switcher", "navbar-icon-links", "version-switcher"], + # navbar_persistent is persistent right (even when on mobiles) + "navbar_persistent": ["search-button"], + "article_header_start": ["breadcrumbs"], + "article_header_end": [], + "article_footer_items": ["prev-next"], + "content_footer_items": [], + # Use html_sidebars that map page patterns to list of sidebar templates + "primary_sidebar_end": [], + "footer_start": ["copyright"], + "footer_center": [], + "footer_end": [], + # When specified as a dictionary, the keys should follow glob-style patterns, as in + # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns + # In particular, "**" specifies the default for all pages + # Use :html_theme.sidebar_secondary.remove: for file-wide removal + "secondary_sidebar_items": {"**": ["page-toc", "sourcelink"]}, + "show_version_warning_banner": True, + "announcement": None, } # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ["themes"] - +# html_theme_path = ["themes"] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -201,10 +291,6 @@ # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = "scikit-learn" -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "logos/scikit-learn-logo-small.png" - # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. @@ -213,19 +299,77 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["images"] +html_static_path = ["images", "css", "js"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # Custom sidebar templates, maps document names to template names. -# html_sidebars = {} +# Workaround for removing the left sidebar on pages without TOC +# A better solution would be to follow the merge of: +# https://github.com/pydata/pydata-sphinx-theme/pull/1682 +html_sidebars = { + "install": [], + "getting_started": [], + "glossary": [], + "faq": [], + "support": [], + "related_projects": [], + "roadmap": [], + "governance": [], + "about": [], +} # Additional templates that should be rendered to pages, maps page names to # template names. html_additional_pages = {"index": "index.html"} +# Additional files to copy +# html_extra_path = [] + +# Additional JS files +html_js_files = [ + "scripts/dropdown.js", + "scripts/version-switcher.js", +] + +# Compile scss files into css files using sphinxcontrib-sass +sass_src_dir, sass_out_dir = "scss", "css/styles" +sass_targets = { + f"{file.stem}.scss": f"{file.stem}.css" + for file in Path(sass_src_dir).glob("*.scss") +} + +# Additional CSS files, should be subset of the values of `sass_targets` +html_css_files = ["styles/colors.css", "styles/custom.css"] + + +def add_js_css_files(app, pagename, templatename, context, doctree): + """Load additional JS and CSS files only for certain pages. + + Note that `html_js_files` and `html_css_files` are included in all pages and + should be used for the ones that are used by multiple pages. All page-specific + JS and CSS files should be added here instead. + """ + if pagename == "api/index": + # External: jQuery and DataTables + app.add_js_file("https://code.jquery.com/jquery-3.7.0.js") + app.add_js_file("https://cdn.datatables.net/2.0.0/js/dataTables.min.js") + app.add_css_file( + "https://cdn.datatables.net/2.0.0/css/dataTables.dataTables.min.css" + ) + # Internal: API search intialization and styling + app.add_js_file("scripts/api-search.js") + app.add_css_file("styles/api-search.css") + elif pagename == "index": + app.add_css_file("styles/index.css") + elif pagename == "install": + app.add_css_file("styles/install.css") + elif pagename.startswith("modules/generated/"): + app.add_css_file("styles/api.css") + + # If false, no module index is generated. html_domain_indices = False @@ -275,6 +419,9 @@ # redirects dictionary maps from old links to new links redirects = { "documentation": "index", + "contents": "index", + "preface": "index", + "modules/classes": "api/index", "auto_examples/feature_selection/plot_permutation_test_for_classification": ( "auto_examples/model_selection/plot_permutation_tests_for_classification" ), @@ -291,13 +438,25 @@ "auto_examples/decomposition/plot_beta_divergence": ( "auto_examples/applications/plot_topics_extraction_with_nmf_lda" ), + "auto_examples/svm/plot_svm_nonlinear": "auto_examples/svm/plot_svm_kernels", + "auto_examples/ensemble/plot_adaboost_hastie_10_2": ( + "auto_examples/ensemble/plot_adaboost_multiclass" + ), + "auto_examples/decomposition/plot_pca_3d": ( + "auto_examples/decomposition/plot_pca_iris" + ), + "auto_examples/exercises/plot_cv_digits.py": ( + "auto_examples/model_selection/plot_nested_cross_validation_iris.py" + ), + "tutorial/machine_learning_map/index.html": "machine_learning_map/index.html", } html_context["redirects"] = redirects for old_link in redirects: html_additional_pages[old_link] = "redirects.html" -# Not showing the search summary makes the search page load faster. -html_show_search_summary = False +# See https://github.com/scikit-learn/scikit-learn/pull/22550 +html_context["is_devrelease"] = parsed_version.is_devrelease + # -- Options for LaTeX output ------------------------------------------------ latex_elements = { @@ -407,7 +566,7 @@ def __call__(self, filename): prefix = "plot_release_highlights_" # Use title to sort if not a release highlight - if not filename.startswith(prefix): + if not str(filename).startswith(prefix): return title major_minor = filename[len(prefix) :].split("_")[:2] @@ -453,6 +612,8 @@ def notebook_modification_function(notebook_content, notebook_filename): code_lines.append("%pip install plotly") if "skimage" in notebook_content_str: code_lines.append("%pip install scikit-image") + if "polars" in notebook_content_str: + code_lines.append("%pip install polars") if "fetch_" in notebook_content_str: code_lines.extend( [ @@ -483,14 +644,16 @@ def reset_sklearn_config(gallery_conf, fname): sklearn.set_config(**default_global_config) +sg_examples_dir = "../examples" +sg_gallery_dir = "auto_examples" sphinx_gallery_conf = { "doc_module": "sklearn", "backreferences_dir": os.path.join("modules", "generated"), "show_memory": False, "reference_url": {"sklearn": None}, - "examples_dirs": ["../examples"], - "gallery_dirs": ["auto_examples"], - "subsection_order": SubSectionTitleOrder("../examples"), + "examples_dirs": [sg_examples_dir], + "gallery_dirs": [sg_gallery_dir], + "subsection_order": SubSectionTitleOrder(sg_examples_dir), "within_subsection_order": SKExampleTitleSortKey, "binder": { "org": "scikit-learn", @@ -500,13 +663,37 @@ def reset_sklearn_config(gallery_conf, fname): "dependencies": "./binder/requirements.txt", "use_jupyter_lab": True, }, - "jupyterlite": {"notebook_modification_function": notebook_modification_function}, # avoid generating too many cross links "inspect_global_variables": False, "remove_config_comments": True, "plot_gallery": "True", + "recommender": {"enable": True, "n_examples": 4, "min_df": 12}, "reset_modules": ("matplotlib", "seaborn", reset_sklearn_config), } +if with_jupyterlite: + sphinx_gallery_conf["jupyterlite"] = { + "notebook_modification_function": notebook_modification_function + } + +# Secondary sidebar configuration for pages generated by sphinx-gallery + +# For the index page of the gallery and each nested section, we hide the secondary +# sidebar by specifying an empty list (no components), because there is no meaningful +# in-page toc for these pages, and they are generated so "sourcelink" is not useful +# either. + +# For each example page we keep default ["page-toc", "sourcelink"] specified by the +# "**" key. "page-toc" is wanted for these pages. "sourcelink" is also necessary since +# otherwise the secondary sidebar will degenerate when "page-toc" is empty, and the +# script `sphinxext/move_gallery_links.py` will fail (it assumes the existence of the +# secondary sidebar). The script will remove "sourcelink" in the end. + +html_theme_options["secondary_sidebar_items"][f"{sg_gallery_dir}/index"] = [] +for sub_sg_dir in (Path(".") / sg_examples_dir).iterdir(): + if sub_sg_dir.is_dir(): + html_theme_options["secondary_sidebar_items"][ + f"{sg_gallery_dir}/{sub_sg_dir.name}/index" + ] = [] # The following dictionary contains the information used to create the @@ -557,73 +744,6 @@ def filter_search_index(app, exception): f.write(searchindex_text) -def generate_min_dependency_table(app): - """Generate min dependency table for docs.""" - from sklearn._min_dependencies import dependent_packages - - # get length of header - package_header_len = max(len(package) for package in dependent_packages) + 4 - version_header_len = len("Minimum Version") + 4 - tags_header_len = max(len(tags) for _, tags in dependent_packages.values()) + 4 - - output = StringIO() - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - dependency_title = "Dependency" - version_title = "Minimum Version" - tags_title = "Purpose" - - output.write( - f"{dependency_title:<{package_header_len}} " - f"{version_title:<{version_header_len}} " - f"{tags_title}\n" - ) - - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - - for package, (version, tags) in dependent_packages.items(): - output.write( - f"{package:<{package_header_len}} {version:<{version_header_len}} {tags}\n" - ) - - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - output = output.getvalue() - - with (Path(".") / "min_dependency_table.rst").open("w") as f: - f.write(output) - - -def generate_min_dependency_substitutions(app): - """Generate min dependency substitutions for docs.""" - from sklearn._min_dependencies import dependent_packages - - output = StringIO() - - for package, (version, _) in dependent_packages.items(): - package = package.capitalize() - output.write(f".. |{package}MinVersion| replace:: {version}") - output.write("\n") - - output = output.getvalue() - - with (Path(".") / "min_dependency_substitutions.rst").open("w") as f: - f.write(output) - - # Config for sphinx_issues # we use the issues path for PRs since the issues URL will forward @@ -639,10 +759,11 @@ def setup(app): # do not run the examples when using linkcheck by using a small priority # (default priority is 500 and sphinx-gallery using builder-inited event too) app.connect("builder-inited", disable_plot_gallery_for_linkcheck, priority=50) - app.connect("builder-inited", generate_min_dependency_table) - app.connect("builder-inited", generate_min_dependency_substitutions) - # to hide/show the prompt in code examples: + # triggered just before the HTML for an individual page is created + app.connect("html-page-context", add_js_css_files) + + # to hide/show the prompt in code examples app.connect("build-finished", make_carousel_thumbs) app.connect("build-finished", filter_search_index) @@ -665,7 +786,8 @@ def setup(app): " non-GUI backend, so cannot show the figure." ), ) - +if os.environ.get("SKLEARN_WARNINGS_AS_ERRORS", "0") != "0": + turn_warnings_into_errors() # maps functions with a class name that is indistinguishable when case is # ignore to another filename @@ -746,6 +868,10 @@ def setup(app): "consistently-create-same-random-numpy-array/5837352#comment6712034_5837352", ] +# Config for sphinx-remove-toctrees + +remove_from_toctrees = ["metadata_routing.rst"] + # Use a browser-like user agent to avoid some "403 Client Error: Forbidden for # url" errors. This is taken from the variable navigator.userAgent inside a # browser console. @@ -763,3 +889,78 @@ def setup(app): linkcheck_request_headers = { "https://github.com/": {"Authorization": f"token {github_token}"}, } + + +# -- Convert .rst.template files to .rst --------------------------------------- + +from api_reference import API_REFERENCE, DEPRECATED_API_REFERENCE + +from sklearn._min_dependencies import dependent_packages + +# If development build, link to local page in the top navbar; otherwise link to the +# development version; see https://github.com/scikit-learn/scikit-learn/pull/22550 +if parsed_version.is_devrelease: + development_link = "developers/index" +else: + development_link = "https://scikit-learn.org/dev/developers/index.html" + +# Define the templates and target files for conversion +# Each entry is in the format (template name, file name, kwargs for rendering) +rst_templates = [ + ("index", "index", {"development_link": development_link}), + ( + "min_dependency_table", + "min_dependency_table", + {"dependent_packages": dependent_packages}, + ), + ( + "min_dependency_substitutions", + "min_dependency_substitutions", + {"dependent_packages": dependent_packages}, + ), + ( + "api/index", + "api/index", + { + "API_REFERENCE": sorted(API_REFERENCE.items(), key=lambda x: x[0]), + "DEPRECATED_API_REFERENCE": sorted( + DEPRECATED_API_REFERENCE.items(), key=lambda x: x[0], reverse=True + ), + }, + ), +] + +# Convert each module API reference page +for module in API_REFERENCE: + rst_templates.append( + ( + "api/module", + f"api/{module}", + {"module": module, "module_info": API_REFERENCE[module]}, + ) + ) + +# Convert the deprecated API reference page (if there exists any) +if DEPRECATED_API_REFERENCE: + rst_templates.append( + ( + "api/deprecated", + "api/deprecated", + { + "DEPRECATED_API_REFERENCE": sorted( + DEPRECATED_API_REFERENCE.items(), key=lambda x: x[0], reverse=True + ) + }, + ) + ) + +for rst_template_name, rst_target_name, kwargs in rst_templates: + # Read the corresponding template file into jinja2 + with (Path(".") / f"{rst_template_name}.rst.template").open( + "r", encoding="utf-8" + ) as f: + t = jinja2.Template(f.read()) + + # Render the template and write to the target + with (Path(".") / f"{rst_target_name}.rst").open("w", encoding="utf-8") as f: + f.write(t.render(**kwargs)) diff --git a/doc/conftest.py b/doc/conftest.py index 73848ccf392fb..7081e8b8bf698 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,16 +1,16 @@ import os -from os.path import exists -from os.path import join -from os import environ import warnings +from os import environ +from os.path import exists, join + +import pytest +from _pytest.doctest import DoctestItem -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import SkipTest -from sklearn.utils._testing import check_skip_network -from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME +from sklearn.utils._testing import SkipTest, check_skip_network +from sklearn.utils.fixes import _IS_PYPY, np_base_version, parse_version def setup_labeled_faces(): @@ -34,7 +34,7 @@ def setup_twenty_newsgroups(): def setup_working_with_text_data(): - if IS_PYPY and os.environ.get("CI", None): + if _IS_PYPY and os.environ.get("CI", None): raise SkipTest("Skipping too slow test with PyPy on CI") check_skip_network() cache_path = _pkl_filepath(get_data_home(), CACHE_NAME) @@ -128,10 +128,6 @@ def pytest_runtest_setup(item): setup_rcv1() elif fname.endswith("datasets/twenty_newsgroups.rst") or is_index: setup_twenty_newsgroups() - elif ( - fname.endswith("tutorial/text_analytics/working_with_text_data.rst") or is_index - ): - setup_working_with_text_data() elif fname.endswith("modules/compose.rst") or is_index: setup_compose() elif fname.endswith("datasets/loading_other_datasets.rst"): @@ -144,19 +140,10 @@ def pytest_runtest_setup(item): setup_preprocessing() elif fname.endswith("statistical_inference/unsupervised_learning.rst"): setup_unsupervised_learning() - elif fname.endswith("metadata_routing.rst"): - # TODO: remove this once implemented - # Skip metarouting because is it is not fully implemented yet - raise SkipTest( - "Skipping doctest for metadata_routing.rst because it " - "is not fully implemented yet" - ) rst_files_requiring_matplotlib = [ "modules/partial_dependence.rst", "modules/tree.rst", - "tutorial/statistical_inference/settings.rst", - "tutorial/statistical_inference/supervised_learning.rst", ] for each in rst_files_requiring_matplotlib: if fname.endswith(each): @@ -174,3 +161,34 @@ def pytest_configure(config): matplotlib.use("agg") except ImportError: pass + + +def pytest_collection_modifyitems(config, items): + """Called after collect is completed. + + Parameters + ---------- + config : pytest config + items : list of collected items + """ + skip_doctests = False + if np_base_version >= parse_version("2"): + # Skip doctests when using numpy 2 for now. See the following discussion + # to decide what to do in the longer term: + # https://github.com/scikit-learn/scikit-learn/issues/27339 + reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" + skip_doctests = True + + # Normally doctest has the entire module's scope. Here we set globs to an empty dict + # to remove the module's scope: + # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context + for item in items: + if isinstance(item, DoctestItem): + item.dtest.globs = {} + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + + for item in items: + if isinstance(item, DoctestItem): + item.add_marker(skip_marker) diff --git a/doc/contents.rst b/doc/contents.rst deleted file mode 100644 index a28634621d558..0000000000000 --- a/doc/contents.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. include:: includes/big_toc_css.rst -.. include:: tune_toc.rst - -.. Places global toc into the sidebar - -:globalsidebartoc: True - -================= -Table Of Contents -================= - -.. Define an order for the Table of Contents: - -.. toctree:: - :maxdepth: 2 - - preface - tutorial/index - getting_started - user_guide - glossary - auto_examples/index - modules/classes - developers/index diff --git a/doc/contributor_experience_team.rst b/doc/contributor_experience_team.rst index 00b658632302e..7d942a07e6a7d 100644 --- a/doc/contributor_experience_team.rst +++ b/doc/contributor_experience_team.rst @@ -6,10 +6,6 @@ img.avatar {border-radius: 10px;}
-
-

Arturo Amor

-
-

Juan Carlos Alfaro JimÊnez

@@ -41,4 +37,8 @@

Albert Thomas

+
+
+

Maren Westermann

+
diff --git a/doc/css/.gitkeep b/doc/css/.gitkeep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/doc/data_transforms.rst b/doc/data_transforms.rst index 084214cb094f5..536539ec97007 100644 --- a/doc/data_transforms.rst +++ b/doc/data_transforms.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _data-transforms: Dataset transformations diff --git a/doc/datasets.rst b/doc/datasets.rst index b9484a02ce84c..ee767e5843256 100644 --- a/doc/datasets.rst +++ b/doc/datasets.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _datasets: ========================= diff --git a/doc/datasets/loading_other_datasets.rst b/doc/datasets/loading_other_datasets.rst index a376a69f26dc3..004aa66c001e5 100644 --- a/doc/datasets/loading_other_datasets.rst +++ b/doc/datasets/loading_other_datasets.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _loading_other_datasets: Loading other datasets @@ -37,9 +33,9 @@ and pipelines on 2D data. if you plan to use ``matplotlib.pyplpt.imshow``, don't forget to scale to the range 0 - 1 as done in the following example. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py` .. _libsvm_loader: @@ -72,11 +68,10 @@ features:: ... "/path/to/test_dataset.txt", n_features=X_train.shape[1]) ... # doctest: +SKIP -.. topic:: Related links: - - _`Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets +.. rubric:: Related links - _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader +- `Public datasets in svmlight / libsvm format`: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets +- `Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader .. For doctests: @@ -99,7 +94,7 @@ from the repository using the function For example, to download a dataset of gene expressions in mice brains:: >>> from sklearn.datasets import fetch_openml - >>> mice = fetch_openml(name='miceprotein', version=4, parser="auto") + >>> mice = fetch_openml(name='miceprotein', version=4) To fully specify a dataset, you need to provide a name and a version, though the version is optional, see :ref:`openml_versions` below. @@ -147,7 +142,7 @@ dataset on the openml website:: The ``data_id`` also uniquely identifies a dataset from OpenML:: - >>> mice = fetch_openml(data_id=40966, parser="auto") + >>> mice = fetch_openml(data_id=40966) >>> mice.details # doctest: +SKIP {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF', 'creator': ..., @@ -171,7 +166,7 @@ which can contain entirely different datasets. If a particular version of a dataset has been found to contain significant issues, it might be deactivated. Using a name to specify a dataset will yield the earliest version of a dataset that is still active. That means that -``fetch_openml(name="miceprotein", parser="auto")`` can yield different results +``fetch_openml(name="miceprotein")`` can yield different results at different times if earlier versions become inactive. You can see that the dataset with ``data_id`` 40966 that we fetched above is the first version of the "miceprotein" dataset:: @@ -182,19 +177,19 @@ the first version of the "miceprotein" dataset:: In fact, this dataset only has one version. The iris dataset on the other hand has multiple versions:: - >>> iris = fetch_openml(name="iris", parser="auto") + >>> iris = fetch_openml(name="iris") >>> iris.details['version'] #doctest: +SKIP '1' >>> iris.details['id'] #doctest: +SKIP '61' - >>> iris_61 = fetch_openml(data_id=61, parser="auto") + >>> iris_61 = fetch_openml(data_id=61) >>> iris_61.details['version'] '1' >>> iris_61.details['id'] '61' - >>> iris_969 = fetch_openml(data_id=969, parser="auto") + >>> iris_969 = fetch_openml(data_id=969) >>> iris_969.details['version'] '3' >>> iris_969.details['id'] @@ -212,18 +207,18 @@ binarized version of the data:: You can also specify both the name and the version, which also uniquely identifies the dataset:: - >>> iris_version_3 = fetch_openml(name="iris", version=3, parser="auto") + >>> iris_version_3 = fetch_openml(name="iris", version=3) >>> iris_version_3.details['version'] '3' >>> iris_version_3.details['id'] '969' -.. topic:: References: +.. rubric:: References - * :arxiv:`Vanschoren, van Rijn, Bischl and Torgo. "OpenML: networked science in - machine learning" ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014. - <1407.7722>` +* :arxiv:`Vanschoren, van Rijn, Bischl and Torgo. "OpenML: networked science in + machine learning" ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014. + <1407.7722>` .. _openml_parser: @@ -290,9 +285,9 @@ format usable by scikit-learn: context such as .mat and .arff * `numpy/routines.io `_ for standard loading of columnar data into numpy arrays -* scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM +* scikit-learn's :func:`load_svmlight_file` for the svmlight or libSVM sparse format -* scikit-learn's :func:`datasets.load_files` for directories of text files where +* scikit-learn's :func:`load_files` for directories of text files where the name of each directory is the name of each category and each file inside of each directory corresponds to one sample from that category diff --git a/doc/datasets/real_world.rst b/doc/datasets/real_world.rst index b528a26674db9..f05d475b0db78 100644 --- a/doc/datasets/real_world.rst +++ b/doc/datasets/real_world.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _real_world_datasets: Real world datasets @@ -25,6 +21,7 @@ They can be loaded using the following functions: fetch_rcv1 fetch_kddcup99 fetch_california_housing + fetch_species_distributions .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst @@ -39,3 +36,5 @@ They can be loaded using the following functions: .. include:: ../../sklearn/datasets/descr/kddcup99.rst .. include:: ../../sklearn/datasets/descr/california_housing.rst + +.. include:: ../../sklearn/datasets/descr/species_distributions.rst diff --git a/doc/datasets/sample_generators.rst b/doc/datasets/sample_generators.rst index 7dc123f08424c..5b8264c2a22b5 100644 --- a/doc/datasets/sample_generators.rst +++ b/doc/datasets/sample_generators.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _sample_generators: Generated datasets diff --git a/doc/datasets/toy_dataset.rst b/doc/datasets/toy_dataset.rst index 65fd20abd361d..d7edecddd3510 100644 --- a/doc/datasets/toy_dataset.rst +++ b/doc/datasets/toy_dataset.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _toy_datasets: Toy datasets diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst index fe573deb28b83..88521c6c51867 100644 --- a/doc/developers/advanced_installation.rst +++ b/doc/developers/advanced_installation.rst @@ -26,12 +26,12 @@ Installing a nightly build is the quickest way to: - check whether a bug you encountered has been fixed since the last release. -You can install the nightly build of scikit-learn using the `scipy-wheels-nightly` +You can install the nightly build of scikit-learn using the `scientific-python-nightly-wheels` index from the PyPI registry of `anaconda.org`: .. prompt:: bash $ - pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn + pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn Note that first uninstalling scikit-learn might be required to be able to install nightly builds of scikit-learn. @@ -58,39 +58,48 @@ feature, code or documentation improvement). If you plan on submitting a pull-request, you should clone from your fork instead. -#. Install a recent version of Python (3.9 is recommended at the time of writing) - for instance using Miniforge3_. Miniforge provides a conda-based distribution - of Python and the most popular scientific libraries. +#. Install a recent version of Python (3.9 or later at the time of writing) for + instance using Miniforge3_. Miniforge provides a conda-based distribution of + Python and the most popular scientific libraries. If you installed Python with conda, we recommend to create a dedicated `conda environment`_ with all the build dependencies of scikit-learn - (namely NumPy_, SciPy_, and Cython_): + (namely NumPy_, SciPy_, Cython_, meson-python_ and Ninja_): + + .. prompt:: bash $ + + conda create -n sklearn-env -c conda-forge python numpy scipy cython meson-python ninja + + It is not always necessary but it is safer to open a new prompt before + activating the newly created conda environment. .. prompt:: bash $ - conda create -n sklearn-env -c conda-forge python=3.9 numpy scipy cython conda activate sklearn-env -#. **Alternative to conda:** If you run Linux or similar, you can instead use - your system's Python provided it is recent enough (3.8 or higher - at the time of writing). In this case, we recommend to create a dedicated - virtualenv_ and install the scikit-learn build dependencies with pip: +#. **Alternative to conda:** You can use alternative installations of Python + provided they are recent enough (3.9 or higher at the time of writing). + Here is an example on how to create a build environment for a Linux system's + Python. Build dependencies are installed with `pip` in a dedicated virtualenv_ + to avoid disrupting other Python programs installed on the system: .. prompt:: bash $ python3 -m venv sklearn-env source sklearn-env/bin/activate - pip install wheel numpy scipy cython + pip install wheel numpy scipy cython meson-python ninja #. Install a compiler with OpenMP_ support for your platform. See instructions for :ref:`compiler_windows`, :ref:`compiler_macos`, :ref:`compiler_linux` and :ref:`compiler_freebsd`. -#. Build the project with pip in :ref:`editable_mode`: +#. Build the project with pip: .. prompt:: bash $ - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true #. Check that the installed scikit-learn has a version number ending with `.dev0`: @@ -104,12 +113,14 @@ feature, code or documentation improvement). .. note:: - You will have to run the ``pip install -v --no-use-pep517 --no-build-isolation -e .`` - command every time the source code of a Cython file is updated - (ending in `.pyx` or `.pxd`). This can happen when you edit them or when you - use certain git commands such as `git pull`. Use the ``--no-build-isolation`` flag - to avoid compiling the whole project each time, only the files you have - modified. + `--config-settings editable-verbose=true` is optional but recommended + to avoid surprises when you import `sklearn`. `meson-python` implements + editable installs by rebuilding `sklearn` when executing `import sklearn`. + With the recommended setting you will see a message when this happens, + rather than potentially waiting without feed-back and wondering + what is taking so long. Bonus: this means you only have to run the `pip + install` command once, `sklearn` will automatically be rebuilt when + importing `sklearn`. Dependencies ------------ @@ -173,26 +184,6 @@ If you want to build a stable version, you can ``git checkout `` to get the code for that particular version, or download an zip archive of the version from github. -.. _editable_mode: - -Editable mode -------------- - -If you run the development version, it is cumbersome to reinstall the package -each time you update the sources. Therefore it is recommended that you install -in with the ``pip install -v --no-use-pep517 --no-build-isolation -e .`` command, -which allows you to edit the code in-place. This builds the extension in place and -creates a link to the development directory (see `the pip docs -`_). - -As the doc above explains, this is fundamentally similar to using the command -``python setup.py develop``. (see `the setuptool docs -`_). -It is however preferred to use pip. - -On Unix-like systems, you can equivalently type ``make in`` from the top-level -folder. Have a look at the ``Makefile`` for additional utilities. - .. _platform_specific_instructions: Platform-specific instructions @@ -227,10 +218,13 @@ console: For 64-bit Python, configure the build environment by running the following commands in ``cmd`` or an Anaconda Prompt (if you use Anaconda): - :: +.. sphinx-prompt 1.3.0 (used in doc-min-dependencies CI task) does not support `batch` prompt type, +.. so we work around by using a known prompt type and an explicit prompt text. +.. +.. prompt:: bash C:\> - $ SET DISTUTILS_USE_SDK=1 - $ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 + SET DISTUTILS_USE_SDK=1 + "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 Replace ``x64`` by ``x86`` to build for 32-bit Python. @@ -238,11 +232,13 @@ Please be aware that the path above might be different from user to user. The aim is to point to the "vcvarsall.bat" file that will set the necessary environment variables in the current command prompt. -Finally, build scikit-learn from this command prompt: +Finally, build scikit-learn with this command prompt: .. prompt:: bash $ - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_macos: @@ -281,10 +277,18 @@ scikit-learn from source: .. prompt:: bash $ conda create -n sklearn-dev -c conda-forge python numpy scipy cython \ - joblib threadpoolctl pytest compilers llvm-openmp + joblib threadpoolctl pytest compilers llvm-openmp meson-python ninja + +It is not always necessary but it is safer to open a new prompt before +activating the newly created conda environment. + +.. prompt:: bash $ + conda activate sklearn-dev make clean - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. note:: @@ -302,12 +306,6 @@ forge using the following command: which should include ``compilers`` and ``llvm-openmp``. -.. note:: - - If you installed these packages after creating and activating a new conda - environment, you will need to first deactivate and then reactivate the - environment for these changes to take effect. - The compilers meta-package will automatically set custom environment variables: @@ -364,7 +362,9 @@ Finally, build scikit-learn in verbose mode (to check for the presence of the .. prompt:: bash $ make clean - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_linux: @@ -390,7 +390,9 @@ then proceed as usual: .. prompt:: bash $ pip3 install cython - pip3 install --verbose --editable . + pip3 install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true Cython and the pre-compiled wheels for the runtime dependencies (numpy, scipy and joblib) should automatically be installed in @@ -422,9 +424,17 @@ in the user folder using conda: .. prompt:: bash $ conda create -n sklearn-dev -c conda-forge python numpy scipy cython \ - joblib threadpoolctl pytest compilers + joblib threadpoolctl pytest compilers meson-python ninja + +It is not always necessary but it is safer to open a new prompt before +activating the newly created conda environment. + +.. prompt:: bash $ + conda activate sklearn-dev - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_freebsd: @@ -453,13 +463,17 @@ Finally, build the package using the standard command: .. prompt:: bash $ - pip install --verbose --no-use-pep517 --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true For the upcoming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in the base system and these steps will not be necessary. .. _OpenMP: https://en.wikipedia.org/wiki/OpenMP .. _Cython: https://cython.org +.. _meson-python: https://mesonbuild.com/meson-python +.. _Ninja: https://ninja-build.org/ .. _NumPy: https://numpy.org .. _SciPy: https://www.scipy.org .. _Homebrew: https://brew.sh @@ -474,7 +488,9 @@ The following command will build scikit-learn using your default C/C++ compiler. .. prompt:: bash $ - pip install --verbose --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true If you want to build scikit-learn with another compiler handled by ``setuptools``, use the following command: @@ -505,17 +521,3 @@ When setting these environment variables, it is advised to first check their In addition, since Scikit-learn uses OpenMP, you need to include the appropriate OpenMP flag of your compiler into the ``CFLAGS`` and ``CPPFLAGS`` environment variables. - -Parallel builds -=============== - -It is possible to build scikit-learn compiled extensions in parallel by setting -and environment variable as follows before calling the ``pip install`` or -``python setup.py build_ext`` commands:: - - export SKLEARN_BUILD_PARALLEL=3 - pip install --verbose --no-use-pep517 --no-build-isolation --editable . - -On a machine with 2 CPU cores, it can be beneficial to use a parallelism level -of 3 to overlap IO bound tasks (reading and writing files on disk) with CPU -bound tasks (actually compiling). diff --git a/doc/developers/bug_triaging.rst b/doc/developers/bug_triaging.rst index 3ec628f7e5867..915ea0a9a22b7 100644 --- a/doc/developers/bug_triaging.rst +++ b/doc/developers/bug_triaging.rst @@ -19,18 +19,18 @@ A third party can give useful feedback or even add comments on the issue. The following actions are typically useful: - - documenting issues that are missing elements to reproduce the problem - such as code samples +- documenting issues that are missing elements to reproduce the problem + such as code samples - - suggesting better use of code formatting +- suggesting better use of code formatting - - suggesting to reformulate the title and description to make them more - explicit about the problem to be solved +- suggesting to reformulate the title and description to make them more + explicit about the problem to be solved - - linking to related issues or discussions while briefly describing how - they are related, for instance "See also #xyz for a similar attempt - at this" or "See also #xyz where the same thing happened in - SomeEstimator" provides context and helps the discussion. +- linking to related issues or discussions while briefly describing how + they are related, for instance "See also #xyz for a similar attempt + at this" or "See also #xyz where the same thing happened in + SomeEstimator" provides context and helps the discussion. .. topic:: Fruitful discussions diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 86575dd75d0f1..3184d71426c95 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -7,12 +7,9 @@ Contributing .. currentmodule:: sklearn This project is a community effort, and everyone is welcome to -contribute. - -The project is hosted on https://github.com/scikit-learn/scikit-learn - +contribute. It is hosted on https://github.com/scikit-learn/scikit-learn. The decision making process and governance structure of scikit-learn is laid -out in the governance document: :ref:`governance`. +out in :ref:`governance`. Scikit-learn is somewhat :ref:`selective ` when it comes to adding new algorithms, and the best way to contribute and to help the project @@ -70,10 +67,12 @@ link to it from your website, or simply star to say "I use it": .. raw:: html - Star - +

+ + +

In case a contribution/issue involves changes to the API principles or changes to dependencies or supported versions, it must be backed by a @@ -82,28 +81,36 @@ or changes to dependencies or supported versions, it must be backed by a using the `SLEP template `_ and follows the decision-making process outlined in :ref:`governance`. -.. topic:: Contributing to related projects +.. dropdown:: Contributing to related projects + + Scikit-learn thrives in an ecosystem of several related projects, which also + may have relevant issues to work on, including smaller projects such as: - Scikit-learn thrives in an ecosystem of several related projects, which also - may have relevant issues to work on, including smaller projects such as: + * `scikit-learn-contrib `__ + * `joblib `__ + * `sphinx-gallery `__ + * `numpydoc `__ + * `liac-arff `__ - * `scikit-learn-contrib `__ - * `joblib `__ - * `sphinx-gallery `__ - * `numpydoc `__ - * `liac-arff `__ + and larger projects: - and larger projects: + * `numpy `__ + * `scipy `__ + * `matplotlib `__ + * and so on. - * `numpy `__ - * `scipy `__ - * `matplotlib `__ - * and so on. + Look for issues marked "help wanted" or similar. Helping these projects may help + scikit-learn too. See also :ref:`related_projects`. + +Automated Contributions Policy +============================== - Look for issues marked "help wanted" or similar. - Helping these projects may help Scikit-learn too. - See also :ref:`related_projects`. +Please refrain from submitting issues or pull requests generated by +fully-automated tools. Maintainers reserve the right, at their sole discretion, +to close such submissions and to block any account responsible for them. +Ideally, contributions should follow from a human-to-human discussion in the +form of an issue. Submitting a bug report or a feature request ============================================ @@ -136,17 +143,15 @@ following rules before submitting: How to make a good bug report ----------------------------- -When you submit an issue to `Github +When you submit an issue to `GitHub `__, please do your best to follow these guidelines! This will make it a lot easier to provide you with good feedback: - The ideal bug report contains a :ref:`short reproducible code snippet - `, this way - anyone can try to reproduce the bug easily (see `this - `_ for more details). If your snippet is - longer than around 50 lines, please link to a `gist - `_ or a github repo. + `, this way anyone can try to reproduce the bug easily. If your + snippet is longer than around 50 lines, please link to a `Gist + `_ or a GitHub repo. - If not feasible to include a reproducible snippet, please be specific about what **estimators and/or functions are involved and the shape of the data**. @@ -155,18 +160,18 @@ feedback: - Please include your **operating system type and version number**, as well as your **Python, scikit-learn, numpy, and scipy versions**. This information - can be found by running the following code snippet:: + can be found by running: - >>> import sklearn - >>> sklearn.show_versions() # doctest: +SKIP + .. prompt:: bash + + python -c "import sklearn; sklearn.show_versions()" - Please ensure all **code snippets and error messages are formatted in appropriate code blocks**. See `Creating and highlighting code blocks `_ for more details. -If you want to help curate issues, read :ref:`the following -`. +If you want to help curate issues, read about :ref:`bug_triaging`. Contributing code ================= @@ -186,6 +191,16 @@ Contributing code so far. In order to claim an issue for yourself, please comment exactly ``/take`` on it for the CI to automatically assign the issue to you. +To maintain the quality of the codebase and ease the review process, any +contribution must conform to the project's :ref:`coding guidelines +`, in particular: + +- Don't modify unrelated lines to keep the PR focused on the scope stated in its + description or issue. +- Only write inline comments that add value and avoid stating the obvious: explain + the "why" rather than the "what". +- **Most importantly**: Do not contribute code that you don't understand. + Video resources --------------- These videos are step-by-step introductions on how to contribute to @@ -242,19 +257,19 @@ how to set up your git repository: 3. Clone your fork of the scikit-learn repo from your GitHub account to your local disk: - .. prompt:: bash $ + .. prompt:: bash git clone git@github.com:YourLogin/scikit-learn.git # add --depth 1 if your connection is slow cd scikit-learn -4. Follow steps 2-7 in :ref:`install_bleeding_edge` to build scikit-learn in +4. Follow steps 2-6 in :ref:`install_bleeding_edge` to build scikit-learn in development mode and return to this document. 5. Install the development dependencies: - .. prompt:: bash $ + .. prompt:: bash - pip install pytest pytest-cov flake8 mypy numpydoc black==23.3.0 + pip install pytest pytest-cov ruff mypy numpydoc black==24.3.0 .. _upstream: @@ -262,26 +277,30 @@ how to set up your git repository: scikit-learn repository, which you can use to keep your repository synchronized with the latest changes: - .. prompt:: bash $ + .. prompt:: bash git remote add upstream git@github.com:scikit-learn/scikit-learn.git 7. Check that the `upstream` and `origin` remote aliases are configured correctly - by running `git remote -v` which should display:: + by running `git remote -v` which should display: + + .. code-block:: text origin git@github.com:YourLogin/scikit-learn.git (fetch) origin git@github.com:YourLogin/scikit-learn.git (push) upstream git@github.com:scikit-learn/scikit-learn.git (fetch) upstream git@github.com:scikit-learn/scikit-learn.git (push) -You should now have a working installation of scikit-learn, and your git -repository properly configured. The next steps now describe the process of -modifying code and submitting a PR: +You should now have a working installation of scikit-learn, and your git repository +properly configured. It could be useful to run some test to verify your installation. +Please refer to :ref:`pytest_tips` for examples. + +The next steps now describe the process of modifying code and submitting a PR: 8. Synchronize your ``main`` branch with the ``upstream/main`` branch, more details on `GitHub Docs `_: - .. prompt:: bash $ + .. prompt:: bash git checkout main git fetch upstream @@ -289,7 +308,7 @@ modifying code and submitting a PR: 9. Create a feature branch to hold your development changes: - .. prompt:: bash $ + .. prompt:: bash git checkout -b my_feature @@ -299,7 +318,7 @@ modifying code and submitting a PR: 10. (**Optional**) Install `pre-commit `_ to run code style checks before each commit: - .. prompt:: bash $ + .. prompt:: bash pip install pre-commit pre-commit install @@ -311,7 +330,7 @@ modifying code and submitting a PR: do the version control. When you're done editing, add changed files using ``git add`` and then ``git commit``: - .. prompt:: bash $ + .. prompt:: bash git add modified_files git commit @@ -319,7 +338,7 @@ modifying code and submitting a PR: to record your changes in Git, then push the changes to your GitHub account with: - .. prompt:: bash $ + .. prompt:: bash git push -u origin my_feature @@ -329,22 +348,10 @@ modifying code and submitting a PR: email to the committers. You may want to consider sending an email to the mailing list for more visibility. -.. note:: - - If you are modifying a Cython module, you have to re-compile after - modifications and before testing them: - - .. prompt:: bash $ - - pip install -v --no-use-pep517 --no-build-isolation -e . - - Use the ``--no-build-isolation`` flag to avoid compiling the whole project - each time, only the files you have modified. - It is often helpful to keep your local feature branch synchronized with the latest changes of the main scikit-learn repository: -.. prompt:: bash $ +.. prompt:: bash git fetch upstream git merge upstream/main @@ -354,7 +361,7 @@ Subsequently, you might need to solve the conflicts. You can refer to the line `_. -.. topic:: Learning git: +.. topic:: Learning Git The `Git documentation `_ and http://try.github.io are excellent resources to get started with git, @@ -366,19 +373,18 @@ Pull request checklist ---------------------- Before a PR can be merged, it needs to be approved by two core developers. -Please prefix the title of your pull request with ``[MRG]`` if the -contribution is complete and should be subjected to a detailed review. An -incomplete contribution -- where you expect to do more work before receiving -a full review -- should be prefixed ``[WIP]`` (to indicate a work in -progress) and changed to ``[MRG]`` when it matures. WIPs may be useful to: +An incomplete contribution -- where you expect to do more work before receiving +a full review -- should be marked as a `draft pull request +`__ +and changed to "ready for review" when it matures. Draft PRs may be useful to: indicate you are working on something to avoid duplicated work, request -broad review of functionality or API, or seek collaborators. WIPs often +broad review of functionality or API, or seek collaborators. Draft PRs often benefit from the inclusion of a `task list `_ in the PR description. In order to ease the reviewing process, we recommend that your contribution -complies with the following rules before marking a PR as ``[MRG]``. The +complies with the following rules before marking a PR as "ready for review". The **bolded** ones are especially important: 1. **Give your pull request a helpful title** that summarizes what your @@ -425,91 +431,72 @@ complies with the following rules before marking a PR as ``[MRG]``. The non-regression tests should fail for the code base in the ``main`` branch and pass for the PR code. -5. Run `black` to auto-format your code. - - .. prompt:: bash $ - - black . +5. Follow the :ref:`coding-guidelines`. - See black's - `editor integration documentation `_ - to configure your editor to run `black`. +6. When applicable, use the validation tools and scripts in the :mod:`sklearn.utils` + module. A list of utility routines available for developers can be found in the + :ref:`developers-utils` page. -6. Run `flake8` to make sure you followed the project coding conventions. - - .. prompt:: bash $ - - flake8 . - -7. Follow the :ref:`coding-guidelines`. - - -8. When applicable, use the validation tools and scripts in the - ``sklearn.utils`` submodule. A list of utility routines available - for developers can be found in the :ref:`developers-utils` page. - -9. Often pull requests resolve one or more other issues (or pull requests). +7. Often pull requests resolve one or more other issues (or pull requests). If merging your pull request means that some other issues/PRs should be closed, you should `use keywords to create link to them `_ (e.g., ``Fixes #1234``; multiple issues/PRs are allowed as long as each one is preceded by a keyword). Upon merging, those issues/PRs will automatically be closed by GitHub. If your pull request is simply - related to some other issues/PRs, create a link to them without using - the keywords (e.g., ``See also #1234``). - -10. PRs should often substantiate the change, through benchmarks of - performance and efficiency (see :ref:`monitoring_performances`) or through - examples of usage. Examples also illustrate the features and intricacies of - the library to users. Have a look at other examples in the `examples/ - `_ - directory for reference. Examples should demonstrate why the new - functionality is useful in practice and, if possible, compare it to other - methods available in scikit-learn. - -11. New features have some maintenance overhead. We expect PR authors - to take part in the maintenance for the code they submit, at least - initially. New features need to be illustrated with narrative - documentation in the user guide, with small code snippets. - If relevant, please also add references in the literature, with PDF links - when possible. - -12. The user guide should also include expected time and space complexity + related to some other issues/PRs, or it only partially resolves the target + issue, create a link to them without using the keywords (e.g., ``Towards #1234``). + +8. PRs should often substantiate the change, through benchmarks of + performance and efficiency (see :ref:`monitoring_performances`) or through + examples of usage. Examples also illustrate the features and intricacies of + the library to users. Have a look at other examples in the `examples/ + `_ + directory for reference. Examples should demonstrate why the new + functionality is useful in practice and, if possible, compare it to other + methods available in scikit-learn. + +9. New features have some maintenance overhead. We expect PR authors + to take part in the maintenance for the code they submit, at least + initially. New features need to be illustrated with narrative + documentation in the user guide, with small code snippets. + If relevant, please also add references in the literature, with PDF links + when possible. + +10. The user guide should also include expected time and space complexity of the algorithm and scalability, e.g. "this algorithm can scale to a large number of samples > 100000, but does not scale in dimensionality: - n_features is expected to be lower than 100". + `n_features` is expected to be lower than 100". You can also check our :ref:`code_review` to get an idea of what reviewers will expect. You can check for common programming errors with the following tools: -* Code with a good unittest coverage (at least 80%, better 100%), check - with: +* Code with a good unit test coverage (at least 80%, better 100%), check with: - .. prompt:: bash $ + .. prompt:: bash pip install pytest pytest-cov - pytest --cov sklearn path/to/tests_for_package + pytest --cov sklearn path/to/tests - see also :ref:`testing_coverage` + See also :ref:`testing_coverage`. - Run static analysis with `mypy`: +* Run static analysis with `mypy`: - .. prompt:: bash $ + .. prompt:: bash mypy sklearn - must not produce new errors in your pull request. Using `# type: ignore` + This must not produce new errors in your pull request. Using `# type: ignore` annotation can be a workaround for a few cases that are not supported by mypy, in particular, - - when importing C or Cython modules - - on properties with decorators + - when importing C or Cython modules, + - on properties with decorators. Bonus points for contributions that include a performance analysis with a benchmark script and profiling output (see :ref:`monitoring_performances`). - Also check out the :ref:`performance-howto` guide for more details on profiling and Cython optimizations. @@ -520,7 +507,7 @@ profiling and Cython optimizations. on all new contributions will get the overall code base quality in the right direction. -.. note:: +.. seealso:: For two very well documented and more detailed guides on development workflow, please pay a visit to the `Scipy Development Workflow @@ -534,30 +521,33 @@ Continuous Integration (CI) * Azure pipelines are used for testing scikit-learn on Linux, Mac and Windows, with different dependencies and settings. -* CircleCI is used to build the docs for viewing, for linting with flake8, and - for testing with ARM64 / aarch64 on Linux +* CircleCI is used to build the docs for viewing. +* Github Actions are used for various tasks, including building wheels and + source distributions. +* Cirrus CI is used to build on ARM. Please note that if one of the following markers appear in the latest commit message, the following actions are taken. - ====================== =================== - Commit Message Marker Action Taken by CI - ---------------------- ------------------- - [ci skip] CI is skipped completely - [cd build] CD is run (wheels and source distribution are built) - [cd build gh] CD is run only for GitHub Actions - [cd build cirrus] CD is run only for Cirrus CI - [lint skip] Azure pipeline skips linting - [scipy-dev] Build & test with our dependencies (numpy, scipy, etc.) development builds - [nogil] Build & test with the nogil experimental branches of CPython, Cython, NumPy, SciPy, ... - [pypy] Build & test with PyPy - [pyodide] Build & test with Pyodide - [azure parallel] Run Azure CI jobs in parallel - [float32] Run float32 tests by setting `SKLEARN_RUN_FLOAT32_TESTS=1`. See :ref:`environment_variable` for more details - [doc skip] Docs are not built - [doc quick] Docs built, but excludes example gallery plots - [doc build] Docs built including example gallery plots (very long) - ====================== =================== +====================== =================== +Commit Message Marker Action Taken by CI +---------------------- ------------------- +[ci skip] CI is skipped completely +[cd build] CD is run (wheels and source distribution are built) +[cd build gh] CD is run only for GitHub Actions +[cd build cirrus] CD is run only for Cirrus CI +[lint skip] Azure pipeline skips linting +[scipy-dev] Build & test with our dependencies (numpy, scipy, etc.) development builds +[nogil] Build & test with the nogil experimental branches of CPython, Cython, NumPy, SciPy, ... +[pypy] Build & test with PyPy +[pyodide] Build & test with Pyodide +[azure parallel] Run Azure CI jobs in parallel +[cirrus arm] Run Cirrus CI ARM test +[float32] Run float32 tests by setting `SKLEARN_RUN_FLOAT32_TESTS=1`. See :ref:`environment_variable` for more details +[doc skip] Docs are not built +[doc quick] Docs built, but excludes example gallery plots +[doc build] Docs built including example gallery plots (very long) +====================== =================== Note that, by default, the documentation is built but only the examples that are directly modified by the pull request are executed. @@ -569,9 +559,7 @@ Stalled pull requests As contributing a feature can be a lengthy process, some pull requests appear inactive but unfinished. In such a case, taking -them over is a great service for the project. - -A good etiquette to take over is: +them over is a great service for the project. A good etiquette to take over is: * **Determine if a PR is stalled** @@ -638,33 +626,32 @@ the contributor become familiar with the contribution workflow, and for the core devs to become acquainted with the contributor; besides which, we frequently underestimate how easy an issue is to solve! -.. topic:: good first issue tag +- **Good first issue tag** - A great way to start contributing to scikit-learn is to pick an item from - the list of `good first issues - `_ - in the issue tracker. Resolving these issues allow you to start contributing - to the project without much prior knowledge. If you have already contributed - to scikit-learn, you should look at Easy issues instead. + A great way to start contributing to scikit-learn is to pick an item from + the list of `good first issues + `_ + in the issue tracker. Resolving these issues allow you to start contributing + to the project without much prior knowledge. If you have already contributed + to scikit-learn, you should look at Easy issues instead. -.. topic:: Easy tag +- **Easy tag** - If you have already contributed to scikit-learn, another great way to contribute - to scikit-learn is to pick an item from the list of `Easy issues - `_ in the issue - tracker. Your assistance in this area will be greatly appreciated by the - more experienced developers as it helps free up their time to concentrate on - other issues. + If you have already contributed to scikit-learn, another great way to contribute + to scikit-learn is to pick an item from the list of `Easy issues + `_ in the issue + tracker. Your assistance in this area will be greatly appreciated by the + more experienced developers as it helps free up their time to concentrate on + other issues. -.. topic:: help wanted tag +- **Help wanted tag** - We often use the help wanted tag to mark issues regardless of difficulty. Additionally, - we use the help wanted tag to mark Pull Requests which have been abandoned - by their original contributor and are available for someone to pick up where the original - contributor left off. The list of issues with the help wanted tag can be found - `here `_. - - Note that not all issues which need contributors will have this tag. + We often use the help wanted tag to mark issues regardless of difficulty. + Additionally, we use the help wanted tag to mark Pull Requests which have been + abandoned by their original contributor and are available for someone to pick up where + the original contributor left off. The list of issues with the help wanted tag can be + found `here `_. + Note that not all issues which need contributors will have this tag. .. _contribute_documentation: @@ -673,60 +660,271 @@ Documentation We are glad to accept any sort of documentation: -* **function/method/class docstrings** (also known as "API documentation") - - these describe what the object does and details any parameters, attributes and - methods. Docstrings live alongside the code in - `sklearn/ `_. -* **user guide** - these provide more detailed information about the algorithms +* **Function/method/class docstrings:** Also known as "API documentation", these + describe what the object does and details any parameters, attributes and + methods. Docstrings live alongside the code in `sklearn/ + `_, and are generated + generated according to `doc/api_reference.py + `_. To + add, update, remove, or deprecate a public API that is listed in :ref:`api_ref`, this + is the place to look at. +* **User guide:** These provide more detailed information about the algorithms implemented in scikit-learn and generally live in the root `doc/ `_ directory and `doc/modules/ `_. -* **tutorials** - these introduce various statistical learning and machine learning - concepts and are located in - `doc/tutorial `_. -* **examples** - these provide full code examples that may demonstrate the use +* **Examples:** These provide full code examples that may demonstrate the use of scikit-learn modules, compare different algorithms or discuss their - interpretation etc. Examples live in - `examples/ `_ -* **other reStructuredText documents** (like this one) - provide various other - useful information (e.g., our guide to contributing) and live in + interpretation, etc. Examples live in + `examples/ `_. +* **Other reStructuredText documents:** These provide various other useful information + (e.g., the :ref:`contributing` guide) and live in `doc/ `_. + +.. dropdown:: Guidelines for writing docstrings + + * When documenting the parameters and attributes, here is a list of some + well-formatted examples + + .. code-block:: text + + n_clusters : int, default=3 + The number of clusters detected by the algorithm. + + some_param : {"hello", "goodbye"}, bool or int, default=True + The parameter description goes here, which can be either a string + literal (either `hello` or `goodbye`), a bool, or an int. The default + value is True. + + array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) \ + or (n_samples,) + This parameter accepts data in either of the mentioned forms, with one + of the mentioned shapes. The default value is `np.ones(shape=(n_samples,))`. + + list_param : list of int + + typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32 + + sample_weight : array-like of shape (n_samples,), default=None + + multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays + + In general have the following in mind: + + * Use Python basic types. (``bool`` instead of ``boolean``) + * Use parenthesis for defining shapes: ``array-like of shape (n_samples,)`` + or ``array-like of shape (n_samples, n_features)`` + * For strings with multiple options, use brackets: ``input: {'log', + 'squared', 'multinomial'}`` + * 1D or 2D data can be a subset of ``{array-like, ndarray, sparse matrix, + dataframe}``. Note that ``array-like`` can also be a ``list``, while + ``ndarray`` is explicitly only a ``numpy.ndarray``. + * Specify ``dataframe`` when "frame-like" features are being used, such as + the column names. + * When specifying the data type of a list, use ``of`` as a delimiter: ``list + of int``. When the parameter supports arrays giving details about the + shape and/or data type and a list of such arrays, you can use one of + ``array-like of shape (n_samples,) or list of such arrays``. + * When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32`` after + defining the shape: ``ndarray of shape (n_samples,), dtype=np.int32``. You + can specify multiple dtype as a set: ``array-like of shape (n_samples,), + dtype={np.float64, np.float32}``. If one wants to mention arbitrary + precision, use `integral` and `floating` rather than the Python dtype + `int` and `float`. When both `int` and `floating` are supported, there is + no need to specify the dtype. + * When the default is ``None``, ``None`` only needs to be specified at the + end with ``default=None``. Be sure to include in the docstring, what it + means for the parameter or attribute to be ``None``. + + * Add "See Also" in docstrings for related classes/functions. + + * "See Also" in docstrings should be one line per reference, with a colon and an + explanation, for example: + + .. code-block:: text + + See Also + -------- + SelectKBest : Select features based on the k highest scores. + SelectFpr : Select features based on a false positive rate test. + + * Add one or two snippets of code in "Example" section to show how it can be used. + + +.. dropdown:: Guidelines for writing the user guide and other reStructuredText documents + + It is important to keep a good compromise between mathematical and algorithmic + details, and give intuition to the reader on what the algorithm does. + + * Begin with a concise, hand-waving explanation of what the algorithm/code does on + the data. + + * Highlight the usefulness of the feature and its recommended application. + Consider including the algorithm's complexity + (:math:`O\left(g\left(n\right)\right)`) if available, as "rules of thumb" can + be very machine-dependent. Only if those complexities are not available, then + rules of thumb may be provided instead. + + * Incorporate a relevant figure (generated from an example) to provide intuitions. + + * Include one or two short code examples to demonstrate the feature's usage. + + * Introduce any necessary mathematical equations, followed by references. By + deferring the mathematical aspects, the documentation becomes more accessible + to users primarily interested in understanding the feature's practical + implications rather than its underlying mechanics. + + * When editing reStructuredText (``.rst``) files, try to keep line length under + 88 characters when possible (exceptions include links and tables). + + * In scikit-learn reStructuredText files both single and double backticks + surrounding text will render as inline literal (often used for code, e.g., + `list`). This is due to specific configurations we have set. Single + backticks should be used nowadays. + + * Too much information makes it difficult for users to access the content they + are interested in. Use dropdowns to factorize it by using the following syntax + + .. code-block:: rst + + .. dropdown:: Dropdown title + + Dropdown content. + + The snippet above will result in the following dropdown: + + .. dropdown:: Dropdown title + + Dropdown content. + + * Information that can be hidden by default using dropdowns is: + + * low hierarchy sections such as `References`, `Properties`, etc. (see for + instance the subsections in :ref:`det_curve`); + + * in-depth mathematical details; + + * narrative that is use-case specific; + + * in general, narrative that may only interest users that want to go beyond + the pragmatics of a given tool. + + * Do not use dropdowns for the low level section `Examples`, as it should stay + visible to all users. Make sure that the `Examples` section comes right after + the main discussion with the least possible folded section in-between. + + * Be aware that dropdowns break cross-references. If that makes sense, hide the + reference along with the text mentioning it. Else, do not use dropdown. + + +.. dropdown:: Guidelines for writing references + + * When bibliographic references are available with `arxiv `_ + or `Digital Object Identifier `_ identification numbers, + use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in + :ref:`Spectral Clustering Graphs `. + + * For the "References" section in docstrings, see + :func:`sklearn.metrics.silhouette_score` as an example. + + * To cross-reference to other pages in the scikit-learn documentation use the + reStructuredText cross-referencing syntax: + + * **Section:** to link to an arbitrary section in the documentation, use + reference labels (see `Sphinx docs + `_). + For example: + + .. code-block:: rst + + .. _my-section: + + My section + ---------- + + This is the text of the section. + + To refer to itself use :ref:`my-section`. + + You should not modify existing sphinx reference labels as this would break + existing cross references and external links pointing to specific sections + in the scikit-learn documentation. + + * **Glossary:** linking to a term in the :ref:`glossary`: + + .. code-block:: rst + + :term:`cross_validation` + + * **Function:** to link to the documentation of a function, use the full import + path to the function: + + .. code-block:: rst + + :func:`~sklearn.model_selection.cross_val_score` + + However, if there is a `.. currentmodule::` directive above you in the document, + you will only need to use the path to the function succeeding the current + module specified. For example: + + .. code-block:: rst + + .. currentmodule:: sklearn.model_selection + + :func:`cross_val_score` + + * **Class:** to link to documentation of a class, use the full import path to the + class, unless there is a `.. currentmodule::` directive in the document above + (see above): + + .. code-block:: rst + + :class:`~sklearn.preprocessing.StandardScaler` + You can edit the documentation using any text editor, and then generate the HTML output by following :ref:`building_documentation`. The resulting HTML files -will be placed in ``_build/html/stable`` and are viewable in a web browser, for -instance by opening the local ``_build/html/stable/index.html`` file. +will be placed in ``_build/html/`` and are viewable in a web browser, for instance by +opening the local ``_build/html/index.html`` file or by running a local server + +.. prompt:: bash + + python -m http.server -d _build/html + .. _building_documentation: Building the documentation -------------------------- -First, make sure you have :ref:`properly installed ` -the development version. +**Before submitting a pull request check if your modifications have introduced +new sphinx warnings by building the documentation locally and try to fix them.** + +First, make sure you have :ref:`properly installed ` the +development version. On top of that, building the documentation requires installing some +additional packages: .. packaging is not needed once setuptools starts shipping packaging>=17.0 -Building the documentation requires installing some additional packages: - -.. prompt:: bash $ +.. prompt:: bash pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \ - scikit-image packaging seaborn sphinx-prompt \ - sphinxext-opengraph plotly pooch + polars scikit-image packaging seaborn sphinx-prompt \ + sphinxext-opengraph sphinx-copybutton plotly pooch \ + pydata-sphinx-theme sphinxcontrib-sass sphinx-design \ + sphinx-remove-toctrees To build the documentation, you need to be in the ``doc`` folder: -.. prompt:: bash $ +.. prompt:: bash cd doc In the vast majority of cases, you only need to generate the full web site, without the example gallery: -.. prompt:: bash $ +.. prompt:: bash make @@ -735,204 +933,35 @@ and are viewable in a web browser, for instance by opening the local ``_build/html/stable/index.html`` file. To also generate the example gallery you can use: -.. prompt:: bash $ +.. prompt:: bash make html -This will run all the examples, which takes a while. If you only want to -generate a few examples, you can use: +This will run all the examples, which takes a while. If you only want to generate a few +examples, which is particularly useful if you are modifying only a few examples, you can +use: -.. prompt:: bash $ +.. prompt:: bash EXAMPLES_PATTERN=your_regex_goes_here make html -This is particularly useful if you are modifying a few examples. - -Set the environment variable `NO_MATHJAX=1` if you intend to view -the documentation in an offline setting. +Set the environment variable `NO_MATHJAX=1` if you intend to view the documentation in +an offline setting. To build the PDF manual, run: -To build the PDF manual, run: - -.. prompt:: bash $ +.. prompt:: bash make latexpdf -.. warning:: **Sphinx version** +.. admonition:: Sphinx version + :class: warning While we do our best to have the documentation build under as many versions of Sphinx as possible, the different versions tend to behave slightly differently. To get the best results, you should use the same version as the one we used on CircleCI. Look at this - `github search `_ + `GitHub search `_ to know the exact version. -Guidelines for writing documentation ------------------------------------- - -It is important to keep a good compromise between mathematical and algorithmic -details, and give intuition to the reader on what the algorithm does. - -Basically, to elaborate on the above, it is best to always -start with a small paragraph with a hand-waving explanation of what the -method does to the data. Then, it is very helpful to point out why the feature is -useful and when it should be used - the latter also including "big O" -(:math:`O\left(g\left(n\right)\right)`) complexities of the algorithm, as opposed -to just *rules of thumb*, as the latter can be very machine-dependent. If those -complexities are not available, then rules of thumb may be provided instead. - -Secondly, a generated figure from an example (as mentioned in the previous -paragraph) should then be included to further provide some intuition. - -Next, one or two small code examples to show its use can be added. - -Next, any math and equations, followed by references, -can be added to further the documentation. Not starting the -documentation with the maths makes it more friendly towards -users that are just interested in what the feature will do, as -opposed to how it works "under the hood". - -Finally, follow the formatting rules below to make it consistently good: - -* Add "See Also" in docstrings for related classes/functions. - -* "See Also" in docstrings should be one line per reference, - with a colon and an explanation, for example:: - - See Also - -------- - SelectKBest : Select features based on the k highest scores. - SelectFpr : Select features based on a false positive rate test. - -* When documenting the parameters and attributes, here is a list of some - well-formatted examples:: - - n_clusters : int, default=3 - The number of clusters detected by the algorithm. - - some_param : {'hello', 'goodbye'}, bool or int, default=True - The parameter description goes here, which can be either a string - literal (either `hello` or `goodbye`), a bool, or an int. The default - value is True. - - array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,) - This parameter accepts data in either of the mentioned forms, with one - of the mentioned shapes. The default value is - `np.ones(shape=(n_samples,))`. - - list_param : list of int - - typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32 - - sample_weight : array-like of shape (n_samples,), default=None - - multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays - - In general have the following in mind: - - 1. Use Python basic types. (``bool`` instead of ``boolean``) - 2. Use parenthesis for defining shapes: ``array-like of shape (n_samples,)`` - or ``array-like of shape (n_samples, n_features)`` - 3. For strings with multiple options, use brackets: - ``input: {'log', 'squared', 'multinomial'}`` - 4. 1D or 2D data can be a subset of - ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like`` - can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``. - 5. Specify ``dataframe`` when "frame-like" features are being used, such - as the column names. - 6. When specifying the data type of a list, use ``of`` as a delimiter: - ``list of int``. When the parameter supports arrays giving details - about the shape and/or data type and a list of such arrays, you can - use one of ``array-like of shape (n_samples,) or list of such arrays``. - 7. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32`` - after defining the shape: - ``ndarray of shape (n_samples,), dtype=np.int32``. You can specify - multiple dtype as a set: - ``array-like of shape (n_samples,), dtype={np.float64, np.float32}``. - If one wants to mention arbitrary precision, use `integral` and - `floating` rather than the Python dtype `int` and `float`. When both - `int` and `floating` are supported, there is no need to specify the - dtype. - 8. When the default is ``None``, ``None`` only needs to be specified at the - end with ``default=None``. Be sure to include in the docstring, what it - means for the parameter or attribute to be ``None``. - -* For unwritten formatting rules, try to follow existing good works: - - * When bibliographic references are available with `arxiv `_ - or `Digital Object Identifier `_ identification numbers, - use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in - :ref:`Spectral Clustering Graphs `. - * For "References" in docstrings, see the Silhouette Coefficient - (:func:`sklearn.metrics.silhouette_score`). - -* When editing reStructuredText (``.rst``) files, try to keep line length under - 80 characters when possible (exceptions include links and tables). - -* In scikit-learn reStructuredText files both single and double backticks - surrounding text will render as inline literal (often used for code, e.g., - `list`). This is due to specific configurations we have set. Single - backticks should be used nowadays. - -* Before submitting your pull request check if your modifications have - introduced new sphinx warnings and try to fix them. - -Cross-referencing ------------------ - -It is often useful to cross-reference to other pages in the scikit-learn -documentation. This should be done with reStructuredText cross-referencing -syntax: - -* Section - to link to an arbitrary section in the documentation, use reference - labels (see - `Sphinx docs `_). - For example: - - .. code-block:: rst - - .. _my-section: - - My section - ---------- - - This is the text of the section. - - To refer to itself use :ref:`my-section`. - - You should not modify existing sphinx reference labels as this would break - existing cross references and external links pointing to specific sections in - the scikit-learn documentation. - -* Glossary - linking to a term in the :ref:`glossary`: - - .. code-block:: rst - - :term:`cross_validation` - -* Function - to link to the documentation of a function, use the full - import path to the function: - - .. code-block:: rst - - :func:`~sklearn.model_selection.cross_val_score` - - However, if there is a 'currentmodule' directive above you in the document, - you will only need to use the path to the function succeeding the current - module specified. For example: - - .. code-block:: rst - - .. currentmodule:: sklearn.model_selection - - :func:`cross_val_score` - -* Class - to link to documentation of a class, use the full import path to the - class, unless there is a 'currentmodule' directive in the document above - (see above): - - .. code-block:: rst - - :class:`~sklearn.preprocessing.StandardScaler` .. _generated_doc_CI: @@ -965,40 +994,36 @@ subpackages. For a more detailed `pytest` workflow, please refer to the We expect code coverage of new features to be at least around 90%. +.. dropdown:: Writing matplotlib-related tests -Writing matplotlib related tests --------------------------------- - -Test fixtures ensure that a set of tests will be executing with the appropriate -initialization and cleanup. The scikit-learn test suite implements a fixture -which can be used with ``matplotlib``. + Test fixtures ensure that a set of tests will be executing with the appropriate + initialization and cleanup. The scikit-learn test suite implements a ``pyplot`` + fixture which can be used with ``matplotlib``. -``pyplot`` - The ``pyplot`` fixture should be used when a test function is dealing with - ``matplotlib``. ``matplotlib`` is a soft dependency and is not required. - This fixture is in charge of skipping the tests if ``matplotlib`` is not - installed. In addition, figures created during the tests will be - automatically closed once the test function has been executed. + The ``pyplot`` fixture should be used when a test function is dealing with + ``matplotlib``. ``matplotlib`` is a soft dependency and is not required. + This fixture is in charge of skipping the tests if ``matplotlib`` is not + installed. In addition, figures created during the tests will be + automatically closed once the test function has been executed. -To use this fixture in a test function, one needs to pass it as an -argument:: + To use this fixture in a test function, one needs to pass it as an + argument:: - def test_requiring_mpl_fixture(pyplot): - # you can now safely use matplotlib + def test_requiring_mpl_fixture(pyplot): + # you can now safely use matplotlib -Workflow to improve test coverage ---------------------------------- +.. dropdown:: Workflow to improve test coverage -To test code coverage, you need to install the `coverage -`_ package in addition to pytest. + To test code coverage, you need to install the `coverage + `_ package in addition to `pytest`. -1. Run 'make test-coverage'. The output lists for each file the line - numbers that are not tested. + 1. Run `make test-coverage`. The output lists for each file the line + numbers that are not tested. -2. Find a low hanging fruit, looking at which lines are not tested, - write or adapt a test specifically for these lines. + 2. Find a low hanging fruit, looking at which lines are not tested, + write or adapt a test specifically for these lines. -3. Loop. + 3. Loop. .. _monitoring_performances: @@ -1012,8 +1037,9 @@ When proposing changes to the existing code base, it's important to make sure that they don't introduce performance regressions. Scikit-learn uses `asv benchmarks `_ to monitor the performance of a selection of common estimators and functions. You can view -these benchmarks on the `scikit-learn benchmark page `_. -The corresponding benchmark suite can be found in the `scikit-learn/asv_benchmarks` directory. +these benchmarks on the `scikit-learn benchmark page +`_. +The corresponding benchmark suite can be found in the `asv_benchmarks/` directory. To use all features of asv, you will need either `conda` or `virtualenv`. For more details please check the `asv installation webpage @@ -1021,20 +1047,20 @@ more details please check the `asv installation webpage First of all you need to install the development version of asv: -.. prompt:: bash $ +.. prompt:: bash pip install git+https://github.com/airspeed-velocity/asv and change your directory to `asv_benchmarks/`: -.. prompt:: bash $ +.. prompt:: bash - cd asv_benchmarks/ + cd asv_benchmarks The benchmark suite is configured to run against your local clone of scikit-learn. Make sure it is up to date: -.. prompt:: bash $ +.. prompt:: bash git fetch upstream @@ -1042,20 +1068,20 @@ In the benchmark suite, the benchmarks are organized following the same structure as scikit-learn. For example, you can compare the performance of a specific estimator between ``upstream/main`` and the branch you are working on: -.. prompt:: bash $ +.. prompt:: bash asv continuous -b LogisticRegression upstream/main HEAD The command uses conda by default for creating the benchmark environments. If you want to use virtualenv instead, use the `-E` flag: -.. prompt:: bash $ +.. prompt:: bash asv continuous -E virtualenv -b LogisticRegression upstream/main HEAD You can also specify a whole module to benchmark: -.. prompt:: bash $ +.. prompt:: bash asv continuous -b linear_model upstream/main HEAD @@ -1065,7 +1091,7 @@ the `-f` flag. To run the full benchmark suite, simply remove the `-b` flag : -.. prompt:: bash $ +.. prompt:: bash asv continuous upstream/main HEAD @@ -1075,14 +1101,14 @@ expression for a more complex subset of benchmarks to run. To run the benchmarks without comparing to another branch, use the `run` command: -.. prompt:: bash $ +.. prompt:: bash asv run -b linear_model HEAD^! You can also run the benchmark suite using the version of scikit-learn already installed in your current Python environment: -.. prompt:: bash $ +.. prompt:: bash asv run --python=same @@ -1091,20 +1117,20 @@ avoid creating a new environment each time you run the benchmarks. By default the results are not saved when using an existing installation. To save the results you must specify a commit hash: -.. prompt:: bash $ +.. prompt:: bash asv run --python=same --set-commit-hash= Benchmarks are saved and organized by machine, environment and commit. To see the list of all saved benchmarks: -.. prompt:: bash $ +.. prompt:: bash asv show and to see the report of a specific run: -.. prompt:: bash $ +.. prompt:: bash asv show @@ -1127,11 +1153,11 @@ All issues and pull requests on the `GitHub issue tracker `_ should have (at least) one of the following tags: -:Bug / Crash: +:Bug: Something is happening that clearly shouldn't happen. Wrong results as well as unexpected errors from estimators go here. -:Cleanup / Enhancement: +:Enhancement: Improving performance, usability, consistency. :Documentation: @@ -1142,7 +1168,7 @@ should have (at least) one of the following tags: There are four other tags to help new contributors: -:good first issue: +:Good first issue: This issue is ideal for a first contribution to scikit-learn. Ask for help if the formulation is unclear. If you have already contributed to scikit-learn, look at Easy issues instead. @@ -1154,7 +1180,7 @@ There are four other tags to help new contributors: Might need some knowledge of machine learning or the package, but is still approachable for someone new to the project. -:help wanted: +:Help wanted: This tag marks an issue which currently lacks a contributor or a PR that needs another contributor to take over the work. These issues can range in difficulty, and may not be approachable @@ -1171,12 +1197,15 @@ Maintaining backwards compatibility Deprecation ----------- -If any publicly accessible method, function, attribute or parameter -is renamed, we still support the old one for two releases and issue -a deprecation warning when it is called/passed/accessed. -E.g., if the function ``zero_one`` is renamed to ``zero_one_loss``, -we add the decorator ``deprecated`` (from ``sklearn.utils``) -to ``zero_one`` and call ``zero_one_loss`` from that function:: +If any publicly accessible class, function, method, attribute or parameter is renamed, +we still support the old one for two releases and issue a deprecation warning when it is +called, passed, or accessed. + +.. rubric:: Deprecating a class or a function + +Suppose the function ``zero_one`` is renamed to ``zero_one_loss``, we add the decorator +:class:`utils.deprecated` to ``zero_one`` and call ``zero_one_loss`` from that +function:: from ..utils import deprecated @@ -1184,36 +1213,47 @@ to ``zero_one`` and call ``zero_one_loss`` from that function:: # actual implementation pass - @deprecated("Function 'zero_one' was renamed to 'zero_one_loss' " - "in version 0.13 and will be removed in release 0.15. " - "Default behavior is changed from 'normalize=False' to " - "'normalize=True'") + @deprecated( + "Function `zero_one` was renamed to `zero_one_loss` in 0.13 and will be " + "removed in 0.15. Default behavior is changed from `normalize=False` to " + "`normalize=True`" + ) def zero_one(y_true, y_pred, normalize=False): return zero_one_loss(y_true, y_pred, normalize) -If an attribute is to be deprecated, -use the decorator ``deprecated`` on a property. Please note that the -``property`` decorator should be placed before the ``deprecated`` -decorator for the docstrings to be rendered properly. -E.g., renaming an attribute ``labels_`` to ``classes_`` can be done as:: +One also needs to move ``zero_one`` from ``API_REFERENCE`` to +``DEPRECATED_API_REFERENCE`` and add ``zero_one_loss`` to ``API_REFERENCE`` in the +``doc/api_reference.py`` file to reflect the changes in :ref:`api_ref`. + +.. rubric:: Deprecating an attribute or a method + +If an attribute or a method is to be deprecated, use the decorator +:class:`~utils.deprecated` on the property. Please note that the +:class:`~utils.deprecated` decorator should be placed before the ``property`` decorator +if there is one, so that the docstrings can be rendered properly. For instance, renaming +an attribute ``labels_`` to ``classes_`` can be done as:: - @deprecated("Attribute `labels_` was deprecated in version 0.13 and " - "will be removed in 0.15. Use `classes_` instead") + @deprecated( + "Attribute `labels_` was deprecated in 0.13 and will be removed in 0.15. Use " + "`classes_` instead" + ) @property def labels_(self): return self.classes_ -If a parameter has to be deprecated, a ``FutureWarning`` warning -must be raised too. -In the following example, k is deprecated and renamed to n_clusters:: +.. rubric:: Deprecating a parameter + +If a parameter has to be deprecated, a ``FutureWarning`` warning must be raised +manually. In the following example, ``k`` is deprecated and renamed to n_clusters:: import warnings - def example_function(n_clusters=8, k='deprecated'): - if k != 'deprecated': - warnings.warn("'k' was renamed to n_clusters in version 0.13 and " - "will be removed in 0.15.", - FutureWarning) + def example_function(n_clusters=8, k="deprecated"): + if k != "deprecated": + warnings.warn( + "`k` was renamed to `n_clusters` in 0.13 and will be removed in 0.15", + FutureWarning, + ) n_clusters = k When the change is in a class, we validate and raise warning in ``fit``:: @@ -1226,10 +1266,11 @@ When the change is in a class, we validate and raise warning in ``fit``:: self.k = k def fit(self, X, y): - if self.k != 'deprecated': - warnings.warn("'k' was renamed to n_clusters in version 0.13 and " - "will be removed in 0.15.", - FutureWarning) + if self.k != "deprecated": + warnings.warn( + "`k` was renamed to `n_clusters` in 0.13 and will be removed in 0.15.", + FutureWarning, + ) self._n_clusters = self.k else: self._n_clusters = self.n_clusters @@ -1243,9 +1284,14 @@ adapt their code to the new behaviour. For example, if the deprecation happened in version 0.18-dev, the message should say it happened in version 0.18 and the old behavior will be removed in version 0.20. +The warning message should also include a brief explanation of the change and point +users to an alternative. + In addition, a deprecation note should be added in the docstring, recalling the same information as the deprecation warning as explained above. Use the -``.. deprecated::`` directive:: +``.. deprecated::`` directive: + +.. code-block:: rst .. deprecated:: 0.13 ``k`` was renamed to ``n_clusters`` in version 0.13 and will be removed @@ -1261,7 +1307,7 @@ Change the default value of a parameter --------------------------------------- If the default value of a parameter needs to be changed, please replace the -default value with a specific value (e.g., ``warn``) and raise +default value with a specific value (e.g., ``"warn"``) and raise ``FutureWarning`` when users are using the default value. The following example assumes that the current version is 0.20 and that we change the default value of ``n_clusters`` from 5 (old default for 0.20) to 10 @@ -1269,10 +1315,12 @@ default value of ``n_clusters`` from 5 (old default for 0.20) to 10 import warnings - def example_function(n_clusters='warn'): - if n_clusters == 'warn': - warnings.warn("The default value of n_clusters will change from " - "5 to 10 in 0.22.", FutureWarning) + def example_function(n_clusters="warn"): + if n_clusters == "warn": + warnings.warn( + "The default value of `n_clusters` will change from 5 to 10 in 0.22.", + FutureWarning, + ) n_clusters = 5 When the change is in a class, we validate and raise warning in ``fit``:: @@ -1280,22 +1328,26 @@ When the change is in a class, we validate and raise warning in ``fit``:: import warnings class ExampleEstimator: - def __init__(self, n_clusters='warn'): + def __init__(self, n_clusters="warn"): self.n_clusters = n_clusters def fit(self, X, y): - if self.n_clusters == 'warn': - warnings.warn("The default value of n_clusters will change from " - "5 to 10 in 0.22.", FutureWarning) - self._n_clusters = 5 + if self.n_clusters == "warn": + warnings.warn( + "The default value of `n_clusters` will change from 5 to 10 in 0.22.", + FutureWarning, + ) + self._n_clusters = 5 Similar to deprecations, the warning message should always give both the version in which the change happened and the version in which the old behavior will be removed. The parameter description in the docstring needs to be updated accordingly by adding -a `versionchanged` directive with the old and new default value, pointing to the -version when the change will be effective:: +a ``versionchanged`` directive with the old and new default value, pointing to the +version when the change will be effective: + +.. code-block:: rst .. versionchanged:: 0.22 The default value for `n_clusters` will change from 5 to 10 in version 0.22. @@ -1305,12 +1357,11 @@ not in other cases. The warning should be caught in all other tests (using e.g., ``@pytest.mark.filterwarnings``), and there should be no warning in the examples. -.. currentmodule:: sklearn - .. _code_review: Code Review Guidelines ====================== + Reviewing code contributed to the project as PRs is a crucial component of scikit-learn development. We encourage anyone to start reviewing code of other developers. The code review process is often highly educational for everybody @@ -1328,86 +1379,87 @@ up this process by providing your feedback. retraction. Regarding docs: typos, grammar issues and disambiguations are better addressed immediately. -Here are a few important aspects that need to be covered in any code review, -from high-level questions to a more detailed check-list. +.. dropdown:: Important aspects to be covered in any code review + + Here are a few important aspects that need to be covered in any code review, + from high-level questions to a more detailed check-list. -- Do we want this in the library? Is it likely to be used? Do you, as - a scikit-learn user, like the change and intend to use it? Is it in - the scope of scikit-learn? Will the cost of maintaining a new - feature be worth its benefits? + - Do we want this in the library? Is it likely to be used? Do you, as + a scikit-learn user, like the change and intend to use it? Is it in + the scope of scikit-learn? Will the cost of maintaining a new + feature be worth its benefits? -- Is the code consistent with the API of scikit-learn? Are public - functions/classes/parameters well named and intuitively designed? + - Is the code consistent with the API of scikit-learn? Are public + functions/classes/parameters well named and intuitively designed? -- Are all public functions/classes and their parameters, return types, and - stored attributes named according to scikit-learn conventions and documented clearly? + - Are all public functions/classes and their parameters, return types, and + stored attributes named according to scikit-learn conventions and documented clearly? -- Is any new functionality described in the user-guide and illustrated with examples? + - Is any new functionality described in the user-guide and illustrated with examples? -- Is every public function/class tested? Are a reasonable set of - parameters, their values, value types, and combinations tested? Do - the tests validate that the code is correct, i.e. doing what the - documentation says it does? If the change is a bug-fix, is a - non-regression test included? Look at `this - `__ - to get started with testing in Python. + - Is every public function/class tested? Are a reasonable set of + parameters, their values, value types, and combinations tested? Do + the tests validate that the code is correct, i.e. doing what the + documentation says it does? If the change is a bug-fix, is a + non-regression test included? Look at `this + `__ + to get started with testing in Python. -- Do the tests pass in the continuous integration build? If - appropriate, help the contributor understand why tests failed. + - Do the tests pass in the continuous integration build? If + appropriate, help the contributor understand why tests failed. -- Do the tests cover every line of code (see the coverage report in the build - log)? If not, are the lines missing coverage good exceptions? + - Do the tests cover every line of code (see the coverage report in the build + log)? If not, are the lines missing coverage good exceptions? -- Is the code easy to read and low on redundancy? Should variable names be - improved for clarity or consistency? Should comments be added? Should comments - be removed as unhelpful or extraneous? + - Is the code easy to read and low on redundancy? Should variable names be + improved for clarity or consistency? Should comments be added? Should comments + be removed as unhelpful or extraneous? -- Could the code easily be rewritten to run much more efficiently for - relevant settings? + - Could the code easily be rewritten to run much more efficiently for + relevant settings? -- Is the code backwards compatible with previous versions? (or is a - deprecation cycle necessary?) + - Is the code backwards compatible with previous versions? (or is a + deprecation cycle necessary?) -- Will the new code add any dependencies on other libraries? (this is - unlikely to be accepted) + - Will the new code add any dependencies on other libraries? (this is + unlikely to be accepted) -- Does the documentation render properly (see the - :ref:`contribute_documentation` section for more details), and are the plots - instructive? + - Does the documentation render properly (see the + :ref:`contribute_documentation` section for more details), and are the plots + instructive? -:ref:`saved_replies` includes some frequent comments that reviewers may make. + :ref:`saved_replies` includes some frequent comments that reviewers may make. .. _communication: -Communication Guidelines ------------------------- +.. dropdown:: Communication Guidelines -Reviewing open pull requests (PRs) helps move the project forward. It is a -great way to get familiar with the codebase and should motivate the -contributor to keep involved in the project. [1]_ + Reviewing open pull requests (PRs) helps move the project forward. It is a + great way to get familiar with the codebase and should motivate the + contributor to keep involved in the project. [1]_ -- Every PR, good or bad, is an act of generosity. Opening with a positive - comment will help the author feel rewarded, and your subsequent remarks may - be heard more clearly. You may feel good also. -- Begin if possible with the large issues, so the author knows they've been - understood. Resist the temptation to immediately go line by line, or to open - with small pervasive issues. -- Do not let perfect be the enemy of the good. If you find yourself making - many small suggestions that don't fall into the :ref:`code_review`, consider - the following approaches: + - Every PR, good or bad, is an act of generosity. Opening with a positive + comment will help the author feel rewarded, and your subsequent remarks may + be heard more clearly. You may feel good also. + - Begin if possible with the large issues, so the author knows they've been + understood. Resist the temptation to immediately go line by line, or to open + with small pervasive issues. + - Do not let perfect be the enemy of the good. If you find yourself making + many small suggestions that don't fall into the :ref:`code_review`, consider + the following approaches: - - refrain from submitting these; - - prefix them as "Nit" so that the contributor knows it's OK not to address; - - follow up in a subsequent PR, out of courtesy, you may want to let the - original contributor know. + - refrain from submitting these; + - prefix them as "Nit" so that the contributor knows it's OK not to address; + - follow up in a subsequent PR, out of courtesy, you may want to let the + original contributor know. -- Do not rush, take the time to make your comments clear and justify your - suggestions. -- You are the face of the project. Bad days occur to everyone, in that - occasion you deserve a break: try to take your time and stay offline. + - Do not rush, take the time to make your comments clear and justify your + suggestions. + - You are the face of the project. Bad days occur to everyone, in that + occasion you deserve a break: try to take your time and stay offline. -.. [1] Adapted from the numpy `communication guidelines - `_. + .. [1] Adapted from the numpy `communication guidelines + `_. Reading the existing code base ============================== @@ -1428,9 +1480,9 @@ make this task easier and faster (in no particular order). relevant, and which are not. In scikit-learn **a lot** of input checking is performed, especially at the beginning of the :term:`fit` methods. Sometimes, only a very small portion of the code is doing the actual job. - For example looking at the ``fit()`` method of + For example looking at the :meth:`~linear_model.LinearRegression.fit` method of :class:`~linear_model.LinearRegression`, what you're looking for - might just be the call the ``scipy.linalg.lstsq``, but it is buried into + might just be the call the :func:`scipy.linalg.lstsq`, but it is buried into multiple lines of input checking and the handling of different kinds of parameters. - Due to the use of `Inheritance @@ -1460,7 +1512,7 @@ make this task easier and faster (in no particular order). IDE goes a long way towards digesting the code base. Being able to quickly jump (or *peek*) to a function/class/attribute definition helps a lot. So does being able to quickly see where a given name is used in a file. - - `git `_ also has some built-in killer + - `Git `_ also has some built-in killer features. It is often useful to understand how a file changed over time, using e.g. ``git blame`` (`manual `_). This can also be done directly @@ -1472,7 +1524,7 @@ make this task easier and faster (in no particular order). - Configure `git blame` to ignore the commit that migrated the code style to `black`. - .. prompt:: bash $ + .. prompt:: bash git config blame.ignoreRevsFile .git-blame-ignore-revs diff --git a/doc/developers/cython.rst b/doc/developers/cython.rst index 0c319eda4a08d..82022ddcbcc56 100644 --- a/doc/developers/cython.rst +++ b/doc/developers/cython.rst @@ -58,13 +58,13 @@ Tips to ease development * You might find this alias to compile individual Cython extension handy: - .. code-block:: + .. code-block:: - # You might want to add this alias to your shell script config. - alias cythonX="cython -X language_level=3 -X boundscheck=False -X wraparound=False -X initializedcheck=False -X nonecheck=False -X cdivision=True" + # You might want to add this alias to your shell script config. + alias cythonX="cython -X language_level=3 -X boundscheck=False -X wraparound=False -X initializedcheck=False -X nonecheck=False -X cdivision=True" - # This generates `source.c` as as if you had recompiled scikit-learn entirely. - cythonX --annotate source.pyx + # This generates `source.c` as if you had recompiled scikit-learn entirely. + cythonX --annotate source.pyx * Using the ``--annotate`` option with this flag allows generating a HTML report of code annotation. This report indicates interactions with the CPython interpreter on a line-by-line basis. @@ -72,10 +72,10 @@ Tips to ease development the computationally intensive sections of the algorithms. For more information, please refer to `this section of Cython's tutorial `_ - .. code-block:: + .. code-block:: - # This generates a HTML report (`source.html`) for `source.c`. - cythonX --annotate source.pyx + # This generates a HTML report (`source.html`) for `source.c`. + cythonX --annotate source.pyx Tips for performance ^^^^^^^^^^^^^^^^^^^^ @@ -107,16 +107,16 @@ Tips for performance the GIL when entering them. You have to do that yourself either by passing ``nogil=True`` to ``cython.parallel.prange`` explicitly, or by using an explicit context manager: - .. code-block:: cython + .. code-block:: cython - cdef inline void my_func(self) nogil: + cdef inline void my_func(self) nogil: - # Some logic interacting with CPython, e.g. allocating arrays via NumPy. + # Some logic interacting with CPython, e.g. allocating arrays via NumPy. - with nogil: - # The code here is run as is it were written in C. + with nogil: + # The code here is run as is it were written in C. - return 0 + return 0 This item is based on `this comment from StÊfan's Benhel `_ @@ -141,3 +141,16 @@ must be ``cimported`` from this module and not from the OpenMP library directly: The parallel loop, `prange`, is already protected by cython and can be used directly from `cython.parallel`. + +Types +~~~~~ + +Cython code requires to use explicit types. This is one of the reasons you get a +performance boost. In order to avoid code duplication, we have a central place +for the most used types in +`sklearn/utils/_typedefs.pyd `_. +Ideally you start by having a look there and `cimport` types you need, for example + +.. code-block:: cython + + from sklear.utils._typedefs cimport float32, float64 diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index f4fd4898865ea..97cb156da5812 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -54,8 +54,8 @@ multiple interfaces): :Transformer: - For filtering or modifying the data, in a supervised or unsupervised - way, implements:: + For modifying the data in a supervised or unsupervised way (e.g. by adding, changing, + or removing columns, but not by adding or removing rows). Implements:: new_data = transformer.transform(data) @@ -282,12 +282,16 @@ the correct interface more easily. in the scikit-learn-contrib `project template `__. + It is particularly important to notice that mixins should be "on the left" while + the ``BaseEstimator`` should be "on the right" in the inheritance list for proper + MRO. + >>> import numpy as np >>> from sklearn.base import BaseEstimator, ClassifierMixin >>> from sklearn.utils.validation import check_X_y, check_array, check_is_fitted >>> from sklearn.utils.multiclass import unique_labels >>> from sklearn.metrics import euclidean_distances - >>> class TemplateClassifier(BaseEstimator, ClassifierMixin): + >>> class TemplateClassifier(ClassifierMixin, BaseEstimator): ... ... def __init__(self, demo_param='demo'): ... self.demo_param = demo_param @@ -349,7 +353,7 @@ The parameter `deep` will control whether or not the parameters of the subestimator__intercept_scaling -> 1 subestimator__l1_ratio -> None subestimator__max_iter -> 100 - subestimator__multi_class -> auto + subestimator__multi_class -> deprecated subestimator__n_jobs -> None subestimator__penalty -> l2 subestimator__random_state -> None @@ -414,7 +418,7 @@ trailing ``_`` is used to check if the estimator has been fitted. Cloning ------- -For use with the :mod:`model_selection` module, +For use with the :mod:`~sklearn.model_selection` module, an estimator must support the ``base.clone`` function to replicate an estimator. This can be done by providing a ``get_params`` method. If ``get_params`` is present, then ``clone(estimator)`` will be an instance of @@ -508,7 +512,7 @@ independent term is stored in ``intercept_``. ``sklearn.linear_model._base`` contains a few base classes and mixins that implement common linear model patterns. -The :mod:`sklearn.utils.multiclass` module contains useful functions +The :mod:`~sklearn.utils.multiclass` module contains useful functions for working with multiclass and multilabel problems. .. _estimator_tags: @@ -568,7 +572,7 @@ pairwise (default=False) or a cross validation procedure that extracts a sub-sample of data intended for a pairwise estimator, where the data needs to be indexed on both axes. Specifically, this tag is used by - :func:`~sklearn.utils.metaestimators._safe_split` to slice rows and + `sklearn.utils.metaestimators._safe_split` to slice rows and columns. preserves_dtype (default=``[np.float64]``) @@ -709,6 +713,54 @@ only wrap the first array and not alter the other arrays. See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py` for an example on how to use the API. +.. _developer_api_check_is_fitted: + +Developer API for `check_is_fitted` +=================================== + +By default :func:`~sklearn.utils.validation.check_is_fitted` checks if there +are any attributes in the instance with a trailing underscore, e.g. `coef_`. +An estimator can change the behavior by implementing a `__sklearn_is_fitted__` +method taking no input and returning a boolean. If this method exists, +:func:`~sklearn.utils.validation.check_is_fitted` simply returns its output. + +See :ref:`sphx_glr_auto_examples_developing_estimators_sklearn_is_fitted.py` +for an example on how to use the API. + +Developer API for HTML representation +===================================== + +.. warning:: + + The HTML representation API is experimental and the API is subject to change. + +Estimators inheriting from :class:`~sklearn.base.BaseEstimator` display +a HTML representation of themselves in interactive programming +environments such as Jupyter notebooks. For instance, we can display this HTML +diagram:: + + from sklearn.base import BaseEstimator + + BaseEstimator() + +The raw HTML representation is obtained by invoking the function +:func:`~sklearn.utils.estimator_html_repr` on an estimator instance. + +To customize the URL linking to an estimator's documentation (i.e. when clicking on the +"?" icon), override the `_doc_link_module` and `_doc_link_template` attributes. In +addition, you can provide a `_doc_link_url_param_generator` method. Set +`_doc_link_module` to the name of the (top level) module that contains your estimator. +If the value does not match the top level module name, the HTML representation will not +contain a link to the documentation. For scikit-learn estimators this is set to +`"sklearn"`. + +The `_doc_link_template` is used to construct the final URL. By default, it can contain +two variables: `estimator_module` (the full name of the module containing the estimator) +and `estimator_name` (the class name of the estimator). If you need more variables you +should implement the `_doc_link_url_param_generator` method which should return a +dictionary of the variables and their values. This dictionary will be used to render the +`_doc_link_template`. + .. _coding-guidelines: Coding guidelines @@ -855,7 +907,7 @@ Numerical assertions in tests ----------------------------- When asserting the quasi-equality of arrays of continuous values, -do use :func:`sklearn.utils._testing.assert_allclose`. +do use `sklearn.utils._testing.assert_allclose`. The relative tolerance is automatically inferred from the provided arrays dtypes (for float32 and float64 dtypes in particular) but you can override @@ -865,4 +917,4 @@ When comparing arrays of zero-elements, please do provide a non-zero value for the absolute tolerance via ``atol``. For more information, please refer to the docstring of -:func:`sklearn.utils._testing.assert_allclose`. +`sklearn.utils._testing.assert_allclose`. diff --git a/doc/developers/index.rst b/doc/developers/index.rst index c2cc35928cbf9..cca77b6a015c9 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -1,16 +1,9 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _developers_guide: ================= Developer's Guide ================= -.. include:: ../includes/big_toc_css.rst -.. include:: ../tune_toc.rst - .. toctree:: contributing diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst index 6b49103774d9c..ffc9b73156fa8 100644 --- a/doc/developers/maintainer.rst +++ b/doc/developers/maintainer.rst @@ -1,6 +1,5 @@ -Maintainer / core-developer information -======================================== - +Maintainer/Core-Developer Information +====================================== Releasing --------- @@ -81,16 +80,16 @@ tag under that branch. This is done only once, as the major and minor releases happen on the same branch: - .. prompt:: bash $ +.. prompt:: bash $ - # Assuming upstream is an alias for the main scikit-learn repo: - git fetch upstream main - git checkout upstream/main - git checkout -b 0.99.X - git push --set-upstream upstream 0.99.X + # Assuming upstream is an alias for the main scikit-learn repo: + git fetch upstream main + git checkout upstream/main + git checkout -b 0.99.X + git push --set-upstream upstream 0.99.X - Again, `X` is literal here, and `99` is replaced by the release number. - The branches are called ``0.19.X``, ``0.20.X``, etc. +Again, `X` is literal here, and `99` is replaced by the release number. +The branches are called ``0.19.X``, ``0.20.X``, etc. In terms of including changes, the first RC ideally counts as a *feature freeze*. Each coming release candidate and the final release afterwards will @@ -105,14 +104,13 @@ in the description of the Pull Request to track progress. This PR will be used to push commits related to the release as explained in :ref:`making_a_release`. -You can also create a second PR from main and targeting main to increment -the ``__version__`` variable in `sklearn/__init__.py` to increment the dev -version. This means while we're in the release candidate period, the latest -stable is two versions behind the main branch, instead of one. In this PR -targeting main you should also include a new file for the matching version -under the ``doc/whats_new/`` folder so PRs that target the next version can -contribute their changelog entries to this file in parallel to the release -process. +You can also create a second PR from main and targeting main to increment the +``__version__`` variable in `sklearn/__init__.py` and in `pyproject.toml` to increment +the dev version. This means while we're in the release candidate period, the latest +stable is two versions behind the main branch, instead of one. In this PR targeting +main you should also include a new file for the matching version under the +``doc/whats_new/`` folder so PRs that target the next version can contribute their +changelog entries to this file in parallel to the release process. Minor version release (also known as bug-fix release) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -121,67 +119,67 @@ The minor releases should include bug fixes and some relevant documentation changes only. Any PR resulting in a behavior change which is not a bug fix should be excluded. As an example, instructions are given for the `1.2.2` release. - - Create a branch, **on your own fork** (here referred to as `fork`) for the release - from `upstream/main`. +- Create a branch, **on your own fork** (here referred to as `fork`) for the release + from `upstream/main`. - .. prompt:: bash $ + .. prompt:: bash $ - git fetch upstream/main - git checkout -b release-1.2.2 upstream/main - git push -u fork release-1.2.2:release-1.2.2 + git fetch upstream/main + git checkout -b release-1.2.2 upstream/main + git push -u fork release-1.2.2:release-1.2.2 - - Create a **draft** PR to the `upstream/1.2.X` branch (not to `upstream/main`) - with all the desired changes. +- Create a **draft** PR to the `upstream/1.2.X` branch (not to `upstream/main`) + with all the desired changes. - - Do not push anything on that branch yet. +- Do not push anything on that branch yet. - - Locally rebase `release-1.2.2` from the `upstream/1.2.X` branch using: +- Locally rebase `release-1.2.2` from the `upstream/1.2.X` branch using: - .. prompt:: bash $ + .. prompt:: bash $ - git rebase -i upstream/1.2.X + git rebase -i upstream/1.2.X - This will open an interactive rebase with the `git-rebase-todo` containing all - the latest commit on `main`. At this stage, you have to perform - this interactive rebase with at least someone else (being three people rebasing - is better not to forget something and to avoid any doubt). + This will open an interactive rebase with the `git-rebase-todo` containing all + the latest commit on `main`. At this stage, you have to perform + this interactive rebase with at least someone else (being three people rebasing + is better not to forget something and to avoid any doubt). - - **Do not remove lines but drop commit by replace** ``pick`` **with** ``drop`` + - **Do not remove lines but drop commit by replace** ``pick`` **with** ``drop`` - - Commits to pick for bug-fix release *generally* are prefixed with: `FIX`, `CI`, - `DOC`. They should at least include all the commits of the merged PRs - that were milestoned for this release on GitHub and/or documented as such in - the changelog. It's likely that some bugfixes were documented in the - changelog of the main major release instead of the next bugfix release, - in which case, the matching changelog entries will need to be moved, - first in the `main` branch then backported in the release PR. + - Commits to pick for bug-fix release *generally* are prefixed with: `FIX`, `CI`, + `DOC`. They should at least include all the commits of the merged PRs + that were milestoned for this release on GitHub and/or documented as such in + the changelog. It's likely that some bugfixes were documented in the + changelog of the main major release instead of the next bugfix release, + in which case, the matching changelog entries will need to be moved, + first in the `main` branch then backported in the release PR. - - Commits to drop for bug-fix release *generally* are prefixed with: `FEAT`, - `MAINT`, `ENH`, `API`. Reasons for not including them is to prevent change of - behavior (which only must feature in breaking or major releases). + - Commits to drop for bug-fix release *generally* are prefixed with: `FEAT`, + `MAINT`, `ENH`, `API`. Reasons for not including them is to prevent change of + behavior (which only must feature in breaking or major releases). - - After having dropped or picked commit, **do no exit** but paste the content - of the `git-rebase-todo` message in the PR. - This file is located at `.git/rebase-merge/git-rebase-todo`. + - After having dropped or picked commit, **do no exit** but paste the content + of the `git-rebase-todo` message in the PR. + This file is located at `.git/rebase-merge/git-rebase-todo`. - - Save and exit, starting the interactive rebase. + - Save and exit, starting the interactive rebase. - - Resolve merge conflicts when they happen. + - Resolve merge conflicts when they happen. - - Force push the result of the rebase and the extra release commits to the release PR: +- Force push the result of the rebase and the extra release commits to the release PR: - .. prompt:: bash $ + .. prompt:: bash $ - git push -f fork release-1.2.2:release-1.2.2 + git push -f fork release-1.2.2:release-1.2.2 - - Copy the :ref:`release_checklist` template and paste it in the description of the - Pull Request to track progress. +- Copy the :ref:`release_checklist` template and paste it in the description of the + Pull Request to track progress. - - Review all the commits included in the release to make sure that they do not - introduce any new feature. We should not blindly trust the commit message prefixes. +- Review all the commits included in the release to make sure that they do not + introduce any new feature. We should not blindly trust the commit message prefixes. - - Remove the draft status of the release PR and invite other maintainers to review the - list of included commits. +- Remove the draft status of the release PR and invite other maintainers to review the + list of included commits. .. _making_a_release: @@ -208,10 +206,12 @@ Making a release - Update the release date in ``whats_new.rst`` - Edit the ``doc/templates/index.html`` to change the 'News' entry of the - front page (with the release month as well). + front page (with the release month as well). Do not forget to remove + the old entries (two years or three releases are typically good + enough) and to update the on-going development entry. -2. On the branch for releasing, update the version number in - ``sklearn/__init__.py``, the ``__version__``. +2. On the branch for releasing, update the version number in ``sklearn/__init__.py``, + the ``__version__`` variable, and in `pyproject.toml`. For major releases, please add a 0 at the end: `0.99.0` instead of `0.99`. diff --git a/doc/developers/minimal_reproducer.rst b/doc/developers/minimal_reproducer.rst index 2cc82d083aaf1..b100bccbaa6b4 100644 --- a/doc/developers/minimal_reproducer.rst +++ b/doc/developers/minimal_reproducer.rst @@ -88,9 +88,9 @@ The following code, while **still not minimal**, is already **much better** because it can be copy-pasted in a Python terminal to reproduce the problem in one step. In particular: - - it contains **all necessary imports statements**; - - it can fetch the public dataset without having to manually download a - file and put it in the expected location on the disk. +- it contains **all necessary imports statements**; +- it can fetch the public dataset without having to manually download a + file and put it in the expected location on the disk. **Improved example** @@ -199,21 +199,21 @@ As already mentioned, the key to communication is the readability of the code and good formatting can really be a plus. Notice that in the previous snippet we: - - try to limit all lines to a maximum of 79 characters to avoid horizontal - scrollbars in the code snippets blocks rendered on the GitHub issue; - - use blank lines to separate groups of related functions; - - place all the imports in their own group at the beginning. +- try to limit all lines to a maximum of 79 characters to avoid horizontal + scrollbars in the code snippets blocks rendered on the GitHub issue; +- use blank lines to separate groups of related functions; +- place all the imports in their own group at the beginning. The simplification steps presented in this guide can be implemented in a different order than the progression we have shown here. The important points are: - - a minimal reproducer should be runnable by a simple copy-and-paste in a - python terminal; - - it should be simplified as much as possible by removing any code steps - that are not strictly needed to reproducing the original problem; - - it should ideally only rely on a minimal dataset generated on-the-fly by - running the code instead of relying on external data, if possible. +- a minimal reproducer should be runnable by a simple copy-and-paste in a + python terminal; +- it should be simplified as much as possible by removing any code steps + that are not strictly needed to reproducing the original problem; +- it should ideally only rely on a minimal dataset generated on-the-fly by + running the code instead of relying on external data, if possible. Use markdown formatting @@ -305,50 +305,50 @@ can be used to create dummy numeric data. - regression - Regressions take continuous numeric data as features and target. + Regressions take continuous numeric data as features and target. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 5, 5 - X = rng.randn(n_samples, n_features) - y = rng.randn(n_samples) + rng = np.random.RandomState(0) + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + y = rng.randn(n_samples) A similar snippet can be used as synthetic data when testing scaling tools such as :class:`sklearn.preprocessing.StandardScaler`. - classification - If the bug is not raised during when encoding a categorical variable, you can - feed numeric data to a classifier. Just remember to ensure that the target - is indeed an integer. + If the bug is not raised during when encoding a categorical variable, you can + feed numeric data to a classifier. Just remember to ensure that the target + is indeed an integer. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 5, 5 - X = rng.randn(n_samples, n_features) - y = rng.randint(0, 2, n_samples) # binary target with values in {0, 1} + rng = np.random.RandomState(0) + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + y = rng.randint(0, 2, n_samples) # binary target with values in {0, 1} - If the bug only happens with non-numeric class labels, you might want to - generate a random target with `numpy.random.choice - `_. + If the bug only happens with non-numeric class labels, you might want to + generate a random target with `numpy.random.choice + `_. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 50, 5 - X = rng.randn(n_samples, n_features) - y = np.random.choice( - ["male", "female", "other"], size=n_samples, p=[0.49, 0.49, 0.02] - ) + rng = np.random.RandomState(0) + n_samples, n_features = 50, 5 + X = rng.randn(n_samples, n_features) + y = np.random.choice( + ["male", "female", "other"], size=n_samples, p=[0.49, 0.49, 0.02] + ) Pandas ------ diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index 287262255535f..42687945a2bba 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -46,31 +46,31 @@ Sometimes however an algorithm cannot be expressed efficiently in simple vectorized Numpy code. In this case, the recommended strategy is the following: - 1. **Profile** the Python implementation to find the main bottleneck and - isolate it in a **dedicated module level function**. This function - will be reimplemented as a compiled extension module. - - 2. If there exists a well maintained BSD or MIT **C/C++** implementation - of the same algorithm that is not too big, you can write a - **Cython wrapper** for it and include a copy of the source code - of the library in the scikit-learn source tree: this strategy is - used for the classes :class:`svm.LinearSVC`, :class:`svm.SVC` and - :class:`linear_model.LogisticRegression` (wrappers for liblinear - and libsvm). - - 3. Otherwise, write an optimized version of your Python function using - **Cython** directly. This strategy is used - for the :class:`linear_model.ElasticNet` and - :class:`linear_model.SGDClassifier` classes for instance. - - 4. **Move the Python version of the function in the tests** and use - it to check that the results of the compiled extension are consistent - with the gold standard, easy to debug Python version. - - 5. Once the code is optimized (not simple bottleneck spottable by - profiling), check whether it is possible to have **coarse grained - parallelism** that is amenable to **multi-processing** by using the - ``joblib.Parallel`` class. +1. **Profile** the Python implementation to find the main bottleneck and + isolate it in a **dedicated module level function**. This function + will be reimplemented as a compiled extension module. + +2. If there exists a well maintained BSD or MIT **C/C++** implementation + of the same algorithm that is not too big, you can write a + **Cython wrapper** for it and include a copy of the source code + of the library in the scikit-learn source tree: this strategy is + used for the classes :class:`svm.LinearSVC`, :class:`svm.SVC` and + :class:`linear_model.LogisticRegression` (wrappers for liblinear + and libsvm). + +3. Otherwise, write an optimized version of your Python function using + **Cython** directly. This strategy is used + for the :class:`linear_model.ElasticNet` and + :class:`linear_model.SGDClassifier` classes for instance. + +4. **Move the Python version of the function in the tests** and use + it to check that the results of the compiled extension are consistent + with the gold standard, easy to debug Python version. + +5. Once the code is optimized (not simple bottleneck spottable by + profiling), check whether it is possible to have **coarse grained + parallelism** that is amenable to **multi-processing** by using the + ``joblib.Parallel`` class. When using Cython, use either @@ -187,7 +187,7 @@ us install ``line_profiler`` and wire it to IPython: pip install line_profiler -- **Under IPython 0.13+**, first create a configuration profile: +**Under IPython 0.13+**, first create a configuration profile: .. prompt:: bash $ @@ -265,7 +265,7 @@ install the latest version: Then, setup the magics in a manner similar to ``line_profiler``. -- **Under IPython 0.11+**, first create a configuration profile: +**Under IPython 0.11+**, first create a configuration profile: .. prompt:: bash $ diff --git a/doc/developers/plotting.rst b/doc/developers/plotting.rst index b0e8b3b43ee45..9acc3ef4a5061 100644 --- a/doc/developers/plotting.rst +++ b/doc/developers/plotting.rst @@ -8,7 +8,7 @@ Scikit-learn defines a simple API for creating visualizations for machine learning. The key features of this API is to run calculations once and to have the flexibility to adjust the visualizations after the fact. This section is intended for developers who wish to develop or maintain plotting tools. For -usage, users should refer to the :ref`User Guide `. +usage, users should refer to the :ref:`User Guide `. Plotting API Overview --------------------- @@ -87,7 +87,7 @@ be placed. In this case, we suggest using matplotlib's By default, the `ax` keyword in `plot` is `None`. In this case, the single axes is created and the gridspec api is used to create the regions to plot in. -See for example, :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator +See for example, :meth:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` which plots multiple lines and contours using this API. The axes defining the bounding box is saved in a `bounding_ax_` attribute. The individual axes created are stored in an `axes_` ndarray, corresponding to the axes position on diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst index aad7cc94eb768..3dbc35cec68d0 100644 --- a/doc/developers/tips.rst +++ b/doc/developers/tips.rst @@ -73,27 +73,25 @@ will run all :term:`common tests` for the ``LogisticRegression`` estimator. When a unit test fails, the following tricks can make debugging easier: - 1. The command line argument ``pytest -l`` instructs pytest to print the local - variables when a failure occurs. +1. The command line argument ``pytest -l`` instructs pytest to print the local + variables when a failure occurs. - 2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To - instead drop into the rich IPython debugger ``ipdb``, you may set up a - shell alias to: +2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To + instead drop into the rich IPython debugger ``ipdb``, you may set up a + shell alias to: -.. prompt:: bash $ + .. prompt:: bash $ - pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no + pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no Other `pytest` options that may become useful include: - - ``-x`` which exits on the first failed test - - ``--lf`` to rerun the tests that failed on the previous run - - ``--ff`` to rerun all previous tests, running the ones that failed first - - ``-s`` so that pytest does not capture the output of ``print()`` - statements - - ``--tb=short`` or ``--tb=line`` to control the length of the logs - - ``--runxfail`` also run tests marked as a known failure (XFAIL) and report - errors. +- ``-x`` which exits on the first failed test, +- ``--lf`` to rerun the tests that failed on the previous run, +- ``--ff`` to rerun all previous tests, running the ones that failed first, +- ``-s`` so that pytest does not capture the output of ``print()`` statements, +- ``--tb=short`` or ``--tb=line`` to control the length of the logs, +- ``--runxfail`` also run tests marked as a known failure (XFAIL) and report errors. Since our continuous integration tests will error if ``FutureWarning`` isn't properly caught, @@ -114,113 +112,135 @@ replies `_ for reviewing: Note that putting this content on a single line in a literal is the easiest way to make it copyable and wrapped on screen. Issue: Usage questions - :: - You are asking a usage question. The issue tracker is for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn). +:: + + You are asking a usage question. The issue tracker is for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn). - Unfortunately, we need to close this issue as this issue tracker is a communication tool used for the development of scikit-learn. The additional activity created by usage questions crowds it too much and impedes this development. The conversation can continue here, however there is no guarantee that is will receive attention from core developers. + Unfortunately, we need to close this issue as this issue tracker is a communication tool used for the development of scikit-learn. The additional activity created by usage questions crowds it too much and impedes this development. The conversation can continue here, however there is no guarantee that it will receive attention from core developers. Issue: You're welcome to update the docs - :: - Please feel free to offer a pull request updating the documentation if you feel it could be improved. +:: + + Please feel free to offer a pull request updating the documentation if you feel it could be improved. Issue: Self-contained example for bug - :: - Please provide [self-contained example code](https://stackoverflow.com/help/mcve), including imports and data (if possible), so that other contributors can just run it and reproduce your issue. Ideally your example code should be minimal. +:: + + Please provide [self-contained example code](https://scikit-learn.org/dev/developers/minimal_reproducer.html), including imports and data (if possible), so that other contributors can just run it and reproduce your issue. Ideally your example code should be minimal. Issue: Software versions - :: - To help diagnose your issue, please paste the output of: - ```py - import sklearn; sklearn.show_versions() - ``` - Thanks. +:: + + To help diagnose your issue, please paste the output of: + ```py + import sklearn; sklearn.show_versions() + ``` + Thanks. Issue: Code blocks - :: - Readability can be greatly improved if you [format](https://help.github.com/articles/creating-and-highlighting-code-blocks/) your code snippets and complete error messages appropriately. For example: +:: + + Readability can be greatly improved if you [format](https://help.github.com/articles/creating-and-highlighting-code-blocks/) your code snippets and complete error messages appropriately. For example: - ```python - print(something) - ``` - generates: ```python print(something) ``` - And: - - ```pytb - Traceback (most recent call last): - File "", line 1, in - ImportError: No module named 'hello' - ``` - generates: + + generates: + + ```python + print(something) + ``` + + And: + ```pytb Traceback (most recent call last): - File "", line 1, in + File "", line 1, in ImportError: No module named 'hello' ``` - You can edit your issue descriptions and comments at any time to improve readability. This helps maintainers a lot. Thanks! + + generates: + + ```pytb + Traceback (most recent call last): + File "", line 1, in + ImportError: No module named 'hello' + ``` + + You can edit your issue descriptions and comments at any time to improve readability. This helps maintainers a lot. Thanks! Issue/Comment: Linking to code - :: - Friendly advice: for clarity's sake, you can link to code like [this](https://help.github.com/articles/creating-a-permanent-link-to-a-code-snippet/). +:: + + Friendly advice: for clarity's sake, you can link to code like [this](https://help.github.com/articles/creating-a-permanent-link-to-a-code-snippet/). Issue/Comment: Linking to comments - :: - Please use links to comments, which make it a lot easier to see what you are referring to, rather than just linking to the issue. See [this](https://stackoverflow.com/questions/25163598/how-do-i-reference-a-specific-issue-comment-on-github) for more details. +:: + + Please use links to comments, which make it a lot easier to see what you are referring to, rather than just linking to the issue. See [this](https://stackoverflow.com/questions/25163598/how-do-i-reference-a-specific-issue-comment-on-github) for more details. PR-NEW: Better description and title - :: - Thanks for the pull request! Please make the title of the PR more descriptive. The title will become the commit message when this is merged. You should state what issue (or PR) it fixes/resolves in the description using the syntax described [here](https://scikit-learn.org/dev/developers/contributing.html#contributing-pull-requests). +:: + + Thanks for the pull request! Please make the title of the PR more descriptive. The title will become the commit message when this is merged. You should state what issue (or PR) it fixes/resolves in the description using the syntax described [here](https://scikit-learn.org/dev/developers/contributing.html#contributing-pull-requests). PR-NEW: Fix # - :: - Please use "Fix #issueNumber" in your PR description (and you can do it more than once). This way the associated issue gets closed automatically when the PR is merged. For more details, look at [this](https://github.com/blog/1506-closing-issues-via-pull-requests). +:: + + Please use "Fix #issueNumber" in your PR description (and you can do it more than once). This way the associated issue gets closed automatically when the PR is merged. For more details, look at [this](https://github.com/blog/1506-closing-issues-via-pull-requests). PR-NEW or Issue: Maintenance cost - :: - Every feature we include has a [maintenance cost](https://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](https://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. Also, we expect PR authors to take part in the maintenance for the code they submit, at least initially. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io). +:: + + Every feature we include has a [maintenance cost](https://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](https://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. Also, we expect PR authors to take part in the maintenance for the code they submit, at least initially. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io). PR-WIP: What's needed before merge? - :: - Please clarify (perhaps as a TODO list in the PR description) what work you believe still needs to be done before it can be reviewed for merge. When it is ready, please prefix the PR title with `[MRG]`. +:: + + Please clarify (perhaps as a TODO list in the PR description) what work you believe still needs to be done before it can be reviewed for merge. When it is ready, please prefix the PR title with `[MRG]`. PR-WIP: Regression test needed - :: - Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at main but pass in this PR. +:: + + Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at main but pass in this PR. PR-WIP: PEP8 - :: - You have some [PEP8](https://www.python.org/dev/peps/pep-0008/) violations, whose details you can see in the Circle CI `lint` job. It might be worth configuring your code editor to check for such errors on the fly, so you can catch them before committing. +:: + + You have some [PEP8](https://www.python.org/dev/peps/pep-0008/) violations, whose details you can see in the Circle CI `lint` job. It might be worth configuring your code editor to check for such errors on the fly, so you can catch them before committing. PR-MRG: Patience - :: - Before merging, we generally require two core developers to agree that your pull request is desirable and ready. [Please be patient](https://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention), as we mostly rely on volunteered time from busy core developers. (You are also welcome to help us out with [reviewing other PRs](https://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines).) +:: + + Before merging, we generally require two core developers to agree that your pull request is desirable and ready. [Please be patient](https://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention), as we mostly rely on volunteered time from busy core developers. (You are also welcome to help us out with [reviewing other PRs](https://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines).) PR-MRG: Add to what's new - :: - Please add an entry to the change log at `doc/whats_new/v*.rst`. Like the other entries there, please reference this pull request with `:pr:` and credit yourself (and other contributors if applicable) with `:user:`. +:: + + Please add an entry to the change log at `doc/whats_new/v*.rst`. Like the other entries there, please reference this pull request with `:pr:` and credit yourself (and other contributors if applicable) with `:user:`. PR: Don't change unrelated - :: - Please do not change unrelated lines. It makes your contribution harder to review and may introduce merge conflicts to other pull requests. +:: + + Please do not change unrelated lines. It makes your contribution harder to review and may introduce merge conflicts to other pull requests. .. highlight:: default @@ -244,19 +264,19 @@ valgrind_. Valgrind is a command-line tool that can trace memory errors in a variety of code. Follow these steps: - 1. Install `valgrind`_ on your system. +1. Install `valgrind`_ on your system. - 2. Download the python valgrind suppression file: `valgrind-python.supp`_. +2. Download the python valgrind suppression file: `valgrind-python.supp`_. - 3. Follow the directions in the `README.valgrind`_ file to customize your - python suppressions. If you don't, you will have spurious output coming - related to the python interpreter instead of your own code. +3. Follow the directions in the `README.valgrind`_ file to customize your + python suppressions. If you don't, you will have spurious output coming + related to the python interpreter instead of your own code. - 4. Run valgrind as follows: +4. Run valgrind as follows: -.. prompt:: bash $ + .. prompt:: bash $ - valgrind -v --suppressions=valgrind-python.supp python my_test_script.py + valgrind -v --suppressions=valgrind-python.supp python my_test_script.py .. _valgrind: https://valgrind.org .. _`README.valgrind`: https://github.com/python/cpython/blob/master/Misc/README.valgrind @@ -335,3 +355,19 @@ point. Then use pytest to run only the tests of the module you are interested in debugging. + +.. _meson_build_backend: + +The Meson Build Backend +======================= + +Since scikit-learn 1.5.0 we use meson-python as the build tool. Meson is +a new tool for scikit-learn and the PyData ecosystem. It is used by several +other packages that have written good guides about what it is and how it works. + +- `pandas setup doc + `_: + pandas has a similar setup as ours (no spin or dev.py) +- `scipy Meson doc + `_ gives + more background about how Meson works behind the scenes diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst index 8b3612afda82a..2525b2b1365ed 100644 --- a/doc/developers/utilities.rst +++ b/doc/developers/utilities.rst @@ -97,7 +97,7 @@ Efficient Linear Algebra & Array Operations fast on large matrices on which you wish to extract only a small number of components. -- :func:`arrayfuncs.cholesky_delete`: +- `arrayfuncs.cholesky_delete`: (used in :func:`~sklearn.linear_model.lars_path`) Remove an item from a cholesky factorization. diff --git a/doc/dispatching.rst b/doc/dispatching.rst index d42fdcc86f9e8..101e493ee96b7 100644 --- a/doc/dispatching.rst +++ b/doc/dispatching.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - =========== Dispatching =========== diff --git a/doc/documentation_team.rst b/doc/documentation_team.rst new file mode 100644 index 0000000000000..e7f13e5fe218f --- /dev/null +++ b/doc/documentation_team.rst @@ -0,0 +1,20 @@ +.. raw :: html + + +
+ +
+
+

Arturo Amor

+
+
+
+

Lucy Liu

+
+
+
+

Yao Xiao

+
+
diff --git a/doc/faq.rst b/doc/faq.rst index dab775de819e7..81f03b49bc7c9 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -1,16 +1,46 @@ +.. raw:: html + + + .. _faq: -=========================== +========================== Frequently Asked Questions -=========================== +========================== .. currentmodule:: sklearn Here we try to give some answers to questions that regularly pop up on the mailing list. .. contents:: Table of Contents - :local: - :depth: 2 + :local: + :depth: 2 + About the project ----------------- @@ -40,21 +70,31 @@ Note however that this support is still considered experimental and specific components might behave slightly differently. Please refer to the test suite of the specific module of interest for more details. +How can I obtain permission to use the images in scikit-learn for my work? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The images contained in the `scikit-learn repository +`_ and the images generated within +the `scikit-learn documentation `_ +can be used via the `BSD 3-Clause License +`_ for +your work. Citations of scikit-learn are highly encouraged and appreciated. See +:ref:`citing scikit-learn `. Implementation decisions ------------------------ -Why is there no support for deep or reinforcement learning / Will there be support for deep or reinforcement learning in scikit-learn? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Why is there no support for deep or reinforcement learning? Will there be such support in the future? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deep learning and reinforcement learning both require a rich vocabulary to define an architecture, with deep learning additionally requiring GPUs for efficient computing. However, neither of these fit within -the design constraints of scikit-learn; as a result, deep learning +the design constraints of scikit-learn. As a result, deep learning and reinforcement learning are currently out of scope for what scikit-learn seeks to achieve. -You can find more information about addition of gpu support at +You can find more information about the addition of GPU support at `Will you add GPU support?`_. Note that scikit-learn currently implements a simple multilayer perceptron @@ -62,7 +102,7 @@ in :mod:`sklearn.neural_network`. We will only accept bug fixes for this module. If you want to implement more complex deep learning models, please turn to popular deep learning frameworks such as `tensorflow `_, -`keras `_ +`keras `_, and `pytorch `_. .. _adding_graphical_models: @@ -85,12 +125,12 @@ do structured prediction: * `pystruct `_ handles general structured learning (focuses on SSVMs on arbitrary graph structures with approximate inference; defines the notion of sample as an instance of - the graph structure) + the graph structure). * `seqlearn `_ handles sequences only (focuses on exact inference; has HMMs, but mostly for the sake of completeness; treats a feature vector as a sample and uses an offset encoding - for the dependencies between feature vectors) + for the dependencies between feature vectors). Why did you remove HMMs from scikit-learn? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -100,26 +140,52 @@ See :ref:`adding_graphical_models`. Will you add GPU support? ^^^^^^^^^^^^^^^^^^^^^^^^^ -No, or at least not in the near future. The main reason is that GPU support -will introduce many software dependencies and introduce platform specific -issues. scikit-learn is designed to be easy to install on a wide variety of -platforms. Outside of neural networks, GPUs don't play a large role in machine -learning today, and much larger gains in speed can often be achieved by a -careful choice of algorithms. +Adding GPU support by default would introduce heavy harware-specific software +dependencies and existing algorithms would need to be reimplemented. This would +make it both harder for the average user to install scikit-learn and harder for +the developers to maintain the code. + +However, since 2023, a limited but growing :ref:`list of scikit-learn +estimators ` can already run on GPUs if the input data is +provided as a PyTorch or CuPy array and if scikit-learn has been configured to +accept such inputs as explained in :ref:`array_api`. This Array API support +allows scikit-learn to run on GPUs without introducing heavy and +hardware-specific software dependencies to the main package. + +Most estimators that rely on NumPy for their computationally intensive operations +can be considered for Array API support and therefore GPU support. + +However, not all scikit-learn estimators are amenable to efficiently running +on GPUs via the Array API for fundamental algorithmic reasons. For instance, +tree-based models currently implemented with Cython in scikit-learn are +fundamentally not array-based algorithms. Other algorithms such as k-means or +k-nearest neighbors rely on array-based algorithms but are also implemented in +Cython. Cython is used to manually interleave consecutive array operations to +avoid introducing performance killing memory access to large intermediate +arrays: this low-level algorithmic rewrite is called "kernel fusion" and cannot +be expressed via the Array API for the foreseeable future. + +Adding efficient GPU support to estimators that cannot be efficiently +implemented with the Array API would require designing and adopting a more +flexible extension system for scikit-learn. This possibility is being +considered in the following GitHub issue (under discussion): + +- https://github.com/scikit-learn/scikit-learn/issues/22438 + Why do categorical variables need preprocessing in scikit-learn, compared to other tools? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Most of scikit-learn assumes data is in NumPy arrays or SciPy sparse matrices of a single numeric dtype. These do not explicitly represent categorical -variables at present. Thus, unlike R's data.frames or pandas.DataFrame, we -require explicit conversion of categorical features to numeric values, as +variables at present. Thus, unlike R's ``data.frames`` or :class:`pandas.DataFrame`, +we require explicit conversion of categorical features to numeric values, as discussed in :ref:`preprocessing_categorical_features`. See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. -Why does Scikit-learn not directly work with, for example, pandas.DataFrame? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Why does scikit-learn not directly work with, for example, :class:`pandas.DataFrame`? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The homogeneous NumPy and SciPy data objects currently expected are most efficient to process for most operations. Extensive work would also be needed @@ -130,7 +196,6 @@ data structures. Note however that :class:`~sklearn.compose.ColumnTransformer` makes it convenient to handle heterogeneous pandas dataframes by mapping homogeneous subsets of dataframe columns selected by name or dtype to dedicated scikit-learn transformers. - Therefore :class:`~sklearn.compose.ColumnTransformer` are often used in the first step of scikit-learn pipelines when dealing with heterogeneous dataframes (see :ref:`pipeline` for more details). @@ -138,25 +203,22 @@ with heterogeneous dataframes (see :ref:`pipeline` for more details). See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. -Do you plan to implement transform for target y in a pipeline? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Currently transform only works for features X in a pipeline. -There's a long-standing discussion about -not being able to transform y in a pipeline. -Follow on github issue -`#4143 `_. -Meanwhile check out +Do you plan to implement transform for target ``y`` in a pipeline? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Currently transform only works for features ``X`` in a pipeline. There's a +long-standing discussion about not being able to transform ``y`` in a pipeline. +Follow on GitHub issue :issue:`4143`. Meanwhile, you can check out :class:`~compose.TransformedTargetRegressor`, `pipegraph `_, -`imbalanced-learn `_. -Note that Scikit-learn solved for the case where y +and `imbalanced-learn `_. +Note that scikit-learn solved for the case where ``y`` has an invertible transformation applied before training -and inverted after prediction. Scikit-learn intends to solve for -use cases where y should be transformed at training time -and not at test time, for resampling and similar uses, -like at `imbalanced-learn`. +and inverted after prediction. scikit-learn intends to solve for +use cases where ``y`` should be transformed at training time +and not at test time, for resampling and similar uses, like at +`imbalanced-learn `_. In general, these use cases can be solved -with a custom meta estimator rather than a Pipeline +with a custom meta estimator rather than a :class:`~pipeline.Pipeline`. Why are there so many different estimators for linear models? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -174,16 +236,17 @@ each other. Let us have a look at - :class:`~linear_model.Ridge`, L2 penalty - :class:`~linear_model.Lasso`, L1 penalty (sparse models) - :class:`~linear_model.ElasticNet`, L1 + L2 penalty (less sparse models) -- :class:`~linear_model.SGDRegressor` with `loss='squared_loss'` +- :class:`~linear_model.SGDRegressor` with `loss="squared_loss"` **Maintainer perspective:** They all do in principle the same and are different only by the penalty they impose. This, however, has a large impact on the way the underlying optimization problem is solved. In the end, this amounts to usage of different -methods and tricks from linear algebra. A special case is `SGDRegressor` which +methods and tricks from linear algebra. A special case is +:class:`~linear_model.SGDRegressor` which comprises all 4 previous models and is different by the optimization procedure. A further side effect is that the different estimators favor different data -layouts (`X` c-contiguous or f-contiguous, sparse csr or csc). This complexity +layouts (`X` C-contiguous or F-contiguous, sparse csr or csc). This complexity of the seemingly simple linear models is the reason for having different estimator classes for different penalties. @@ -230,8 +293,8 @@ this reason. .. _new_algorithms_inclusion_criteria: -What are the inclusion criteria for new algorithms ? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +What are the inclusion criteria for new algorithms? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We only consider well-established algorithms for inclusion. A rule of thumb is at least 3 years since publication, 200+ citations, and wide use and @@ -256,8 +319,8 @@ Inclusion of a new algorithm speeding up an existing model is easier if: - it does not introduce new hyper-parameters (as it makes the library more future-proof), - it is easy to document clearly when the contribution improves the speed - and when it does not, for instance "when n_features >> - n_samples", + and when it does not, for instance, "when ``n_features >> + n_samples``", - benchmarks clearly show a speed up. Also, note that your implementation need not be in scikit-learn to be used @@ -282,7 +345,7 @@ at which point the original author might long have lost interest. See also :ref:`new_algorithms_inclusion_criteria`. For a great read about long-term maintenance issues in open-source software, look at `the Executive Summary of Roads and Bridges -`_ +`_. Using scikit-learn @@ -290,25 +353,25 @@ Using scikit-learn What's the best way to get help on scikit-learn usage? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -**For general machine learning questions**, please use -`Cross Validated `_ with the ``[machine-learning]`` tag. -**For scikit-learn usage questions**, please use `Stack Overflow `_ -with the ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the `mailing list -`_. +* General machine learning questions: use `Cross Validated + `_ with the ``[machine-learning]`` tag. + +* scikit-learn usage questions: use `Stack Overflow + `_ with the + ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the `mailing list + `_. Please make sure to include a minimal reproduction code snippet (ideally shorter than 10 lines) that highlights your problem on a toy dataset (for instance from -``sklearn.datasets`` or randomly generated with functions of ``numpy.random`` with +:mod:`sklearn.datasets` or randomly generated with functions of ``numpy.random`` with a fixed random seed). Please remove any line of code that is not necessary to reproduce your problem. The problem should be reproducible by simply copy-pasting your code snippet in a Python shell with scikit-learn installed. Do not forget to include the import statements. - More guidance to write good reproduction code snippets can be found at: - -https://stackoverflow.com/help/mcve +https://stackoverflow.com/help/mcve. If your problem raises an exception that you do not understand (even after googling it), please make sure to include the full traceback that you obtain when running the @@ -317,12 +380,9 @@ reproduction script. For bug reports or feature requests, please make use of the `issue tracker on GitHub `_. -There is also a `scikit-learn Gitter channel -`_ where some users and developers -might be found. - -**Please do not email any authors directly to ask for assistance, report bugs, -or for any other issue related to scikit-learn.** +.. warning:: + Please do not email any authors directly to ask for assistance, report bugs, + or for any other issue related to scikit-learn. How should I save, export or deploy estimators for production? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -336,15 +396,15 @@ Bunch objects are sometimes used as an output for functions and methods. They extend dictionaries by enabling values to be accessed by key, `bunch["value_key"]`, or by an attribute, `bunch.value_key`. -They should not be used as an input; therefore you almost never need to create -a ``Bunch`` object, unless you are extending the scikit-learn's API. +They should not be used as an input. Therefore you almost never need to create +a :class:`~utils.Bunch` object, unless you are extending scikit-learn's API. How can I load my own datasets into a format usable by scikit-learn? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Generally, scikit-learn works on any numeric data stored as numpy arrays or scipy sparse matrices. Other types that are convertible to numeric -arrays such as pandas DataFrame are also acceptable. +arrays such as :class:`pandas.DataFrame` are also acceptable. For more information on loading your data files into these usable data structures, please refer to :ref:`loading external datasets `. @@ -363,7 +423,7 @@ For more general feature extraction from any kind of data, see Another common case is when you have non-numerical data and a custom distance (or similarity) metric on these data. Examples include strings with edit -distance (aka. Levenshtein distance; e.g., DNA or RNA sequences). These can be +distance (aka. Levenshtein distance), for instance, DNA or RNA sequences. These can be encoded as numbers, but doing so is painful and error-prone. Working with distance metrics on arbitrary data can be done in two ways. @@ -371,15 +431,15 @@ Firstly, many estimators take precomputed distance/similarity matrices, so if the dataset is not too large, you can compute distances for all pairs of inputs. If the dataset is large, you can use feature vectors with only one "feature", which is an index into a separate data structure, and supply a custom metric -function that looks up the actual data in this data structure. E.g., to use -DBSCAN with Levenshtein distances:: +function that looks up the actual data in this data structure. For instance, to use +:class:`~cluster.dbscan` with Levenshtein distances:: - >>> from leven import levenshtein # doctest: +SKIP >>> import numpy as np + >>> from leven import levenshtein # doctest: +SKIP >>> from sklearn.cluster import dbscan >>> data = ["ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA"] >>> def lev_metric(x, y): - ... i, j = int(x[0]), int(y[0]) # extract indices + ... i, j = int(x[0]), int(y[0]) # extract indices ... return levenshtein(data[i], data[j]) ... >>> X = np.arange(len(data)).reshape(-1, 1) @@ -389,25 +449,24 @@ DBSCAN with Levenshtein distances:: [2]]) >>> # We need to specify algorithm='brute' as the default assumes >>> # a continuous feature space. - >>> dbscan(X, metric=lev_metric, eps=5, min_samples=2, algorithm='brute') - ... # doctest: +SKIP - ([0, 1], array([ 0, 0, -1])) + >>> dbscan(X, metric=lev_metric, eps=5, min_samples=2, algorithm='brute') # doctest: +SKIP + (array([0, 1]), array([ 0, 0, -1])) -(This uses the third-party edit distance package ``leven``.) +Note that the example above uses the third-party edit distance package +`leven `_. Similar tricks can be used, +with some care, for tree kernels, graph kernels, etc. -Similar tricks can be used, with some care, for tree kernels, graph kernels, -etc. +Why do I sometimes get a crash/freeze with ``n_jobs > 1`` under OSX or Linux? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Why do I sometime get a crash/freeze with n_jobs > 1 under OSX or Linux? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Several scikit-learn tools such as ``GridSearchCV`` and ``cross_val_score`` -rely internally on Python's `multiprocessing` module to parallelize execution +Several scikit-learn tools such as :class:`~model_selection.GridSearchCV` and +:class:`~model_selection.cross_val_score` rely internally on Python's +:mod:`multiprocessing` module to parallelize execution onto several Python processes by passing ``n_jobs > 1`` as an argument. -The problem is that Python ``multiprocessing`` does a ``fork`` system call +The problem is that Python :mod:`multiprocessing` does a ``fork`` system call without following it with an ``exec`` system call for performance reasons. Many -libraries like (some versions of) Accelerate / vecLib under OSX, (some versions +libraries like (some versions of) Accelerate or vecLib under OSX, (some versions of) MKL, the OpenMP runtime of GCC, nvidia's Cuda (and probably many others), manage their own internal thread pool. Upon a call to `fork`, the thread pool state in the child process is corrupted: the thread pool believes it has many @@ -418,30 +477,30 @@ main since 0.2.10) and we contributed a `patch `_ to GCC's OpenMP runtime (not yet reviewed). -But in the end the real culprit is Python's ``multiprocessing`` that does +But in the end the real culprit is Python's :mod:`multiprocessing` that does ``fork`` without ``exec`` to reduce the overhead of starting and using new Python processes for parallel computing. Unfortunately this is a violation of the POSIX standard and therefore some software editors like Apple refuse to -consider the lack of fork-safety in Accelerate / vecLib as a bug. +consider the lack of fork-safety in Accelerate and vecLib as a bug. -In Python 3.4+ it is now possible to configure ``multiprocessing`` to -use the 'forkserver' or 'spawn' start methods (instead of the default -'fork') to manage the process pools. To work around this issue when +In Python 3.4+ it is now possible to configure :mod:`multiprocessing` to +use the ``"forkserver"`` or ``"spawn"`` start methods (instead of the default +``"fork"``) to manage the process pools. To work around this issue when using scikit-learn, you can set the ``JOBLIB_START_METHOD`` environment -variable to 'forkserver'. However the user should be aware that using -the 'forkserver' method prevents joblib.Parallel to call function +variable to ``"forkserver"``. However the user should be aware that using +the ``"forkserver"`` method prevents :class:`joblib.Parallel` to call function interactively defined in a shell session. -If you have custom code that uses ``multiprocessing`` directly instead of using -it via joblib you can enable the 'forkserver' mode globally for your -program: Insert the following instructions in your main script:: +If you have custom code that uses :mod:`multiprocessing` directly instead of using +it via :mod:`joblib` you can enable the ``"forkserver"`` mode globally for your +program. Insert the following instructions in your main script:: import multiprocessing # other imports, custom code, load data, define model... - if __name__ == '__main__': - multiprocessing.set_start_method('forkserver') + if __name__ == "__main__": + multiprocessing.set_start_method("forkserver") # call scikit-learn utils with n_jobs > 1 here @@ -450,20 +509,20 @@ documentation `. +For more details, please refer to our :ref:`notes on parallelism `. How do I set a ``random_state`` for an entire execution? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/getting_started.rst b/doc/getting_started.rst index cd4d953db1b8a..14e0178f0826b 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -53,6 +53,8 @@ new data. You don't need to re-train the estimator:: >>> clf.predict([[4, 5, 6], [14, 15, 16]]) # predict classes of new data array([0, 1]) +You can check :ref:`ml_map` on how to choose the right model for your use case. + Transformers and pre-processors ------------------------------- @@ -227,6 +229,3 @@ provide. You can also find an exhaustive list of the public API in the You can also look at our numerous :ref:`examples ` that illustrate the use of ``scikit-learn`` in many different contexts. - -The :ref:`tutorials ` also contain additional learning -resources. diff --git a/doc/glossary.rst b/doc/glossary.rst index 0a249cf94ad22..84a628b0f716d 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -66,6 +66,7 @@ General Concepts It excludes: * a :term:`sparse matrix` + * a sparse array * an iterator * a generator @@ -205,6 +206,29 @@ General Concepts exceptional behaviours on the estimator using semantic :term:`estimator tags`. + cross-fitting + cross fitting + A resampling method that iteratively partitions data into mutually + exclusive subsets to fit two stages. During the first stage, the + mutually exclusive subsets enable predictions or transformations to be + computed on data not seen during training. The computed data is then + used in the second stage. The objective is to avoid having any + overfitting in the first stage introduce bias into the input data + distribution of the second stage. + For examples of its use, see: :class:`~preprocessing.TargetEncoder`, + :class:`~ensemble.StackingClassifier`, + :class:`~ensemble.StackingRegressor` and + :class:`~calibration.CalibratedClassifierCV`. + + cross-validation + cross validation + A resampling method that iteratively partitions data into mutually + exclusive 'train' and 'test' subsets so model performance can be + evaluated on unseen data. This conserves data as avoids the need to hold + out a 'validation' dataset and accounts for variability as multiple + rounds of cross validation are generally performed. + See :ref:`User Guide ` for more details. + deprecation We use deprecation to slowly violate our :term:`backwards compatibility` assurances, usually to: @@ -262,7 +286,26 @@ General Concepts Note that in this case, the precision can be platform dependent. The `numeric` dtype refers to accepting both `integer` and `floating`. - TODO: Mention efficiency and precision issues; casting policy. + When it comes to choosing between 64-bit dtype (i.e. `np.float64` and + `np.int64`) and 32-bit dtype (i.e. `np.float32` and `np.int32`), it + boils down to a trade-off between efficiency and precision. The 64-bit + types offer more accurate results due to their lower floating-point + error, but demand more computational resources, resulting in slower + operations and increased memory usage. In contrast, 32-bit types + promise enhanced operation speed and reduced memory consumption, but + introduce a larger floating-point error. The efficiency improvement are + dependent on lower level optimization such as like vectorization, + single instruction multiple dispatch (SIMD), or cache optimization but + crucially on the compatibility of the algorithm in use. + + Specifically, the choice of precision should account for whether the + employed algorithm can effectively leverage `np.float32`. Some + algorithms, especially certain minimization methods, are exclusively + coded for `np.float64`, meaning that even if `np.float32` is passed, it + triggers an automatic conversion back to `np.float64`. This not only + negates the intended computational savings but also introduces + additional overhead, making operations with `np.float32` unexpectedly + slower and more memory-intensive due to this extra conversion step. duck typing We try to apply `duck typing @@ -344,8 +387,8 @@ General Concepts evaluation metric evaluation metrics Evaluation metrics give a measure of how well a model performs. We may - use this term specifically to refer to the functions in :mod:`metrics` - (disregarding :mod:`metrics.pairwise`), as distinct from the + use this term specifically to refer to the functions in :mod:`~sklearn.metrics` + (disregarding :mod:`~sklearn.metrics.pairwise`), as distinct from the :term:`score` method and the :term:`scoring` API used in cross validation. See :ref:`model_evaluation`. @@ -360,7 +403,7 @@ General Concepts the scoring API. Note that some estimators can calculate metrics that are not included - in :mod:`metrics` and are estimator-specific, notably model + in :mod:`~sklearn.metrics` and are estimator-specific, notably model likelihoods. estimator tags @@ -494,8 +537,8 @@ General Concepts applying a :term:`transformer` to the entirety of a dataset rather than each training portion in a cross validation split. - We aim to provide interfaces (such as :mod:`pipeline` and - :mod:`model_selection`) that shield the user from data leakage. + We aim to provide interfaces (such as :mod:`~sklearn.pipeline` and + :mod:`~sklearn.model_selection`) that shield the user from data leakage. memmapping memory map @@ -575,7 +618,7 @@ General Concepts params We mostly use *parameter* to refer to the aspects of an estimator that can be specified in its construction. For example, ``max_depth`` and - ``random_state`` are parameters of :class:`RandomForestClassifier`. + ``random_state`` are parameters of :class:`~ensemble.RandomForestClassifier`. Parameters to an estimator's constructor are stored unmodified as attributes on the estimator instance, and conventionally start with an alphabetic character and end with an alphanumeric character. Each @@ -620,7 +663,7 @@ General Concepts implementations of distance metrics (as well as improper metrics like Cosine Distance) through :func:`metrics.pairwise_distances`, and of kernel functions (a constrained class of similarity functions) in - :func:`metrics.pairwise_kernels`. These can compute pairwise distance + :func:`metrics.pairwise.pairwise_kernels`. These can compute pairwise distance matrices that are symmetric and hence store data redundantly. See also :term:`precomputed` and :term:`metric`. @@ -1026,6 +1069,38 @@ Further examples: * :class:`gaussian_process.kernels.Kernel` * ``tree.Criterion`` +.. _glossary_metadata_routing: + +Metadata Routing +================ + +.. glossary:: + + consumer + An object which consumes :term:`metadata`. This object is usually an + :term:`estimator`, a :term:`scorer`, or a :term:`CV splitter`. Consuming + metadata means using it in calculations, e.g. using + :term:`sample_weight` to calculate a certain type of score. Being a + consumer doesn't mean that the object always receives a certain + metadata, rather it means it can use it if it is provided. + + metadata + Data which is related to the given :term:`X` and :term:`y` data, but + is not directly a part of the data, e.g. :term:`sample_weight` or + :term:`groups`, and is passed along to different objects and methods, + e.g. to a :term:`scorer` or a :term:`CV splitter`. + + router + An object which routes metadata to :term:`consumers `. This + object is usually a :term:`meta-estimator`, e.g. + :class:`~pipeline.Pipeline` or :class:`~model_selection.GridSearchCV`. + Some routers can also be a consumer. This happens for example when a + meta-estimator uses the given :term:`groups`, and it also passes it + along to some of its sub-objects, such as a :term:`CV splitter`. + +Please refer to :ref:`Metadata Routing User Guide ` for more +information. + .. _glossary_target_types: Target Types @@ -1122,7 +1197,7 @@ Target Types XXX: For simplicity, we may not always support string class labels for multiclass multioutput, and integer class labels should be used. - :mod:`multioutput` provides estimators which estimate multi-output + :mod:`~sklearn.multioutput` provides estimators which estimate multi-output problems using multiple single-output estimators. This may not fully account for dependencies among the different outputs, which methods natively handling the multioutput case (e.g. decision trees, nearest @@ -1474,7 +1549,7 @@ functions or non-estimator constructors. 1: 1}, {0: 1, 1: 1}]`` instead of ``[{1:1}, {2:5}, {3:1}, {4:1}]``. The ``class_weight`` parameter is validated and interpreted with - :func:`utils.compute_class_weight`. + :func:`utils.class_weight.compute_class_weight`. ``cv`` Determines a cross validation splitting strategy, as used in @@ -1500,16 +1575,17 @@ functions or non-estimator constructors. With some exceptions (especially where not using cross validation at all is an option), the default is 5-fold. - ``cv`` values are validated and interpreted with :func:`utils.check_cv`. + ``cv`` values are validated and interpreted with + :func:`model_selection.check_cv`. ``kernel`` Specifies the kernel function to be used by Kernel Method algorithms. - For example, the estimators :class:`SVC` and - :class:`GaussianProcessClassifier` both have a ``kernel`` parameter - that takes the name of the kernel to use as string or a callable - kernel function used to compute the kernel matrix. For more reference, - see the :ref:`kernel_approximation` and the :ref:`gaussian_process` - user guides. + For example, the estimators :class:`svm.SVC` and + :class:`gaussian_process.GaussianProcessClassifier` both have a + ``kernel`` parameter that takes the name of the kernel to use as string + or a callable kernel function used to compute the kernel matrix. For + more reference, see the :ref:`kernel_approximation` and the + :ref:`gaussian_process` user guides. ``max_iter`` For estimators involving iterative optimization, this determines the @@ -1670,12 +1746,12 @@ functions or non-estimator constructors. is an interaction between ``warm_start`` and the parameter controlling the number of iterations of the estimator. - For estimators imported from :mod:`ensemble`, + For estimators imported from :mod:`~sklearn.ensemble`, ``warm_start`` will interact with ``n_estimators`` or ``max_iter``. For these models, the number of iterations, reported via ``len(estimators_)`` or ``n_iter_``, corresponds the total number of estimators/iterations learnt since the initialization of the model. - Thus, if a model was already initialized with `N`` estimators, and `fit` + Thus, if a model was already initialized with `N` estimators, and `fit` is called with ``n_estimators`` or ``max_iter`` set to `M`, the model will train `M - N` new estimators. diff --git a/doc/governance.rst b/doc/governance.rst index 5b153aed7a0ce..d6b07afe4eeb4 100644 --- a/doc/governance.rst +++ b/doc/governance.rst @@ -58,45 +58,47 @@ members and recant their rights until they become active again. The list of members, active and emeritus (with dates at which they became active) is public on the scikit-learn website. -The following teams form the core contributors group. - - -Contributor Experience Team -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The contributor experience team improves the experience of contributors by -helping with the triage of issues and pull requests, as well as noticing any -repeating patterns where people might struggle, and to help with improving -those aspects of the project. - -To this end, they have the required permissions on github to label and close -issues. :ref:`Their work ` is crucial to improve the -communication in the project and limit the crowding of the issue tracker. - -.. _communication_team: - -Communication team -~~~~~~~~~~~~~~~~~~ - -Members of the communication team help with outreach and communication -for scikit-learn. The goal of the team is to develop public awareness of -scikit-learn, of its features and usage, as well as branding. - -For this, they can operate the scikit-learn accounts on various social networks -and produce materials. They also have the required rights to our blog -repository and other relevant accounts and platforms. - -Maintainers -~~~~~~~~~~~ - -Maintainers are community members who have shown that they are dedicated to the -continued development of the project through ongoing engagement with the -community. They have shown they can be trusted to maintain scikit-learn with -care. Being a maintainer allows contributors to more easily carry on with their -project related activities by giving them direct access to the project's -repository. Maintainers are expected to review code contributions, merge -approved pull requests, cast votes for and against merging a pull-request, -and to be involved in deciding major changes to the API. +The following teams form the core contributors group: + +* **Contributor Experience Team** + The contributor experience team improves the experience of contributors by + helping with the triage of issues and pull requests, as well as noticing any + repeating patterns where people might struggle, and to help with improving + those aspects of the project. + + To this end, they have the required permissions on github to label and close + issues. :ref:`Their work ` is crucial to improve the + communication in the project and limit the crowding of the issue tracker. + + .. _communication_team: + +* **Communication Team** + Members of the communication team help with outreach and communication + for scikit-learn. The goal of the team is to develop public awareness of + scikit-learn, of its features and usage, as well as branding. + + For this, they can operate the scikit-learn accounts on various social networks + and produce materials. They also have the required rights to our blog + repository and other relevant accounts and platforms. + +* **Documentation Team** + Members of the documentation team engage with the documentation of the project + among other things. They might also be involved in other aspects of the + project, but their reviews on documentation contributions are considered + authoritative, and can merge such contributions. + + To this end, they have permissions to merge pull requests in scikit-learn's + repository. + +* **Maintainers Team** + Maintainers are community members who have shown that they are dedicated to the + continued development of the project through ongoing engagement with the + community. They have shown they can be trusted to maintain scikit-learn with + care. Being a maintainer allows contributors to more easily carry on with their + project related activities by giving them direct access to the project's + repository. Maintainers are expected to review code contributions, merge + approved pull requests, cast votes for and against merging a pull-request, + and to be involved in deciding major changes to the API. Technical Committee ------------------- @@ -158,8 +160,8 @@ are made according to the following rules: versions** happen via a :ref:`slep` and follows the decision-making process outlined above. -* **Changes to the governance model** follow the process outlined in [ - SLEP020](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep020/proposal.html). +* **Changes to the governance model** follow the process outlined in `SLEP020 + `__. If a veto -1 vote is cast on a lazy consensus, the proposer can appeal to the community and maintainers and the change can be approved or rejected using diff --git a/doc/images/Tidelift-logo-on-light.svg b/doc/images/Tidelift-logo-on-light.svg new file mode 100644 index 0000000000000..af12d68417235 --- /dev/null +++ b/doc/images/Tidelift-logo-on-light.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + diff --git a/doc/images/bcg-small.png b/doc/images/bcg-small.png deleted file mode 100644 index 8ff377969003a..0000000000000 Binary files a/doc/images/bcg-small.png and /dev/null differ diff --git a/doc/images/chanel-small.png b/doc/images/chanel-small.png new file mode 100644 index 0000000000000..b1965b714a42f Binary files /dev/null and b/doc/images/chanel-small.png differ diff --git a/doc/images/chanel.png b/doc/images/chanel.png new file mode 100644 index 0000000000000..1b2d39fd4facf Binary files /dev/null and b/doc/images/chanel.png differ diff --git a/doc/images/fujitsu-small.png b/doc/images/fujitsu-small.png deleted file mode 100644 index b77447117497d..0000000000000 Binary files a/doc/images/fujitsu-small.png and /dev/null differ diff --git a/doc/images/ml_map.README.rst b/doc/images/ml_map.README.rst new file mode 100644 index 0000000000000..8d82c175dad58 --- /dev/null +++ b/doc/images/ml_map.README.rst @@ -0,0 +1,20 @@ +The scikit-learn machine learning cheat sheet was originally created by Andreas Mueller: +https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html + +The current version of the chart is located at `doc/images/ml_map.svg` in SVG+XML +format, created using [draw.io](https://draw.io/). To edit the chart, open the file in +draw.io, make changes, and save. This should update the chart in-place. Another option +would be to re-export the chart as SVG and replace the existing file. The options used +for exporting the chart are: + +- Zoom: 100% +- Border width: 15 +- Size: Diagram +- Transparent Background: False +- Appearance: Light + +Each node in the chart that contains an estimator should have a link, where the root +directory is at `../../`. Note that after updating or re-exporting the SVG, the links +may be prefixed with e.g. `https://app.diagrams.net/`. Remember to check and remove +them, for instance by replacing all occurrences of `https://app.diagrams.net/../../` +with `../../`. diff --git a/doc/images/ml_map.png b/doc/images/ml_map.png deleted file mode 100644 index 73ebd9c05fcc4..0000000000000 Binary files a/doc/images/ml_map.png and /dev/null differ diff --git a/doc/images/ml_map.svg b/doc/images/ml_map.svg new file mode 100644 index 0000000000000..7c587cef011b9 --- /dev/null +++ b/doc/images/ml_map.svg @@ -0,0 +1,4 @@ + + + +
START
START
>50
samples
>50...
get
more
data
get...
NO
NO
predicting a
category
predicting...
YES
YES
do you have
labeled
data
do you hav...
YES
YES
predicting a
quantity
predicting...
NO
NO
just
looking
just...
NO
NO
predicting
structure
predicting...
NO
NO
tough
luck
tough...
<100K
samples
<100K...
YES
YES
SGD
Classifier
SGD...
NO
NO
Linear
SVC
Linear...
YES
YES
text
data
text...
😭
😭
Kernel
Approximation
Kernel...
😭
😭
KNeighbors
Classifier
KNeighbors...
NO
NO
SVC
SVC
Ensemble
Classifiers
Ensemble...
😭
😭
Naive
Bayes
Naive...
YES
YES
classification
classification
number of
categories
known
number of...
NO
NO
<10K
samples
<10K...
<10K
samples
<10K...
NO
NO
NO
NO
YES
YES
MeanShift
MeanShift
VBGMM
VBGMM
YES
YES
MiniBatch
KMeans
MiniBatch...
NO
NO
clustering
clustering
KMeans
KMeans
YES
YES
Spectral
Clustering
Spectral...
GMM
GMM
😭
😭
<100K
samples
<100K...
YES
YES
few features
should be
important
few features...
YES
YES
SGD
Regressor
SGD...
NO
NO
Lasso
Lasso
ElasticNet
ElasticNet
YES
YES
RidgeRegression
RidgeRegression
SVR(kernel="linear")
SVR(kernel="linea...
NO
NO
SVR(kernel="rbf")
SVR(kernel="rbf...
Ensemble
Regressors
Ensemble...
😭
😭
regression
regression
Ramdomized
PCA
Ramdomized...
YES
YES
<10K
samples
<10K...
😭
😭
Kernel
Approximation
Kernel...
NO
NO
IsoMap
IsoMap
Spectral
Embedding
Spectral...
YES
YES
LLE
LLE
😭
😭
dimensionality
reduction
dimensionality...
scikit-learn
algorithm cheat sheet
scikit-learn...
Text is not SVG - cannot display
diff --git a/doc/images/permuted_non_predictive_feature.png b/doc/images/permuted_non_predictive_feature.png new file mode 100644 index 0000000000000..3ba908cbfbe83 Binary files /dev/null and b/doc/images/permuted_non_predictive_feature.png differ diff --git a/doc/images/permuted_predictive_feature.png b/doc/images/permuted_predictive_feature.png new file mode 100644 index 0000000000000..702c698425618 Binary files /dev/null and b/doc/images/permuted_predictive_feature.png differ diff --git a/doc/images/probabl.png b/doc/images/probabl.png new file mode 100644 index 0000000000000..aab532ba62d95 Binary files /dev/null and b/doc/images/probabl.png differ diff --git a/doc/includes/big_toc_css.rst b/doc/includes/big_toc_css.rst deleted file mode 100644 index a8ba83e99c5b8..0000000000000 --- a/doc/includes/big_toc_css.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. - File to ..include in a document with a big table of content, to give - it 'style' - -.. raw:: html - - - - - diff --git a/doc/includes/bigger_toc_css.rst b/doc/includes/bigger_toc_css.rst deleted file mode 100644 index d866bd145d883..0000000000000 --- a/doc/includes/bigger_toc_css.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. - File to ..include in a document with a very big table of content, to - give it 'style' - -.. raw:: html - - - - - diff --git a/doc/index.rst.template b/doc/index.rst.template new file mode 100644 index 0000000000000..f1f1f49836515 --- /dev/null +++ b/doc/index.rst.template @@ -0,0 +1,24 @@ +.. title:: Index + +.. Define the overall structure, that affects the prev-next buttons and the order + of the sections in the top navbar. + +.. toctree:: + :hidden: + :maxdepth: 2 + + Install + user_guide + API + auto_examples/index + Community + getting_started + whats_new + Glossary + Development <{{ development_link }}> + FAQ + support + related_projects + roadmap + Governance + about diff --git a/doc/inspection.rst b/doc/inspection.rst index 57c1cfc3275e8..95d121ec10d7d 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _inspection: Inspection @@ -21,9 +15,9 @@ predictions from a model and what affects them. This can be used to evaluate assumptions and biases of a model, design a better model, or to diagnose issues with model performance. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` .. toctree:: diff --git a/doc/install.rst b/doc/install.rst index bf2832bf72f24..be924b012ce65 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -6,21 +6,21 @@ Installing scikit-learn There are different ways to install scikit-learn: - * :ref:`Install the latest official release `. This - is the best approach for most users. It will provide a stable version - and pre-built packages are available for most platforms. +* :ref:`Install the latest official release `. This + is the best approach for most users. It will provide a stable version + and pre-built packages are available for most platforms. - * Install the version of scikit-learn provided by your - :ref:`operating system or Python distribution `. - This is a quick option for those who have operating systems or Python - distributions that distribute scikit-learn. - It might not provide the latest release version. +* Install the version of scikit-learn provided by your + :ref:`operating system or Python distribution `. + This is a quick option for those who have operating systems or Python + distributions that distribute scikit-learn. + It might not provide the latest release version. - * :ref:`Building the package from source - `. This is best for users who want the - latest-and-greatest features and aren't afraid of running - brand-new code. This is also needed for users who wish to contribute to the - project. +* :ref:`Building the package from source + `. This is best for users who want the + latest-and-greatest features and aren't afraid of running + brand-new code. This is also needed for users who wish to contribute to the + project. .. _install_official_release: @@ -28,94 +28,132 @@ There are different ways to install scikit-learn: Installing the latest release ============================= -.. This quickstart installation is a hack of the awesome - https://spacy.io/usage/#quickstart page. - See the original javascript implementation - https://github.com/ines/quickstart - - -.. raw:: html - -
- Operating System - - - - - -
- Packager - - - -
- - - - -.. raw:: html - -
- Install the 64bit version of Python 3, for instance from https://www.python.org.Install Python 3 using homebrew (brew install python) or by manually installing the package from https://www.python.org.Install python3 and python3-pip using the package manager of the Linux Distribution.Install conda using the Anaconda or miniconda - installers or the miniforge installers - (no administrator permission required for any of those). -
- -Then run: - -.. raw:: html - -
python3 -m venv sklearn-venvpython -m venv sklearn-venvpython -m venv sklearn-venvsource sklearn-venv/bin/activatesource sklearn-venv/bin/activatesklearn-venv\Scripts\activatepip install -U scikit-learnpip install -U scikit-learnpip install -U scikit-learnpip3 install -U scikit-learnconda create -n sklearn-env -c conda-forge scikit-learnconda activate sklearn-env
- -In order to check your installation you can use - -.. raw:: html - -
python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython3 -m pip freeze  # to see all packages installed in the active virtualenvpython3 -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"conda list scikit-learn  # to see which scikit-learn version is installedconda list  # to see all packages installed in the active conda environmentpython -c "import sklearn; sklearn.show_versions()"
-
- -Note that in order to avoid potential conflicts with other packages it is -strongly recommended to use a `virtual environment (venv) -`_ or a `conda environment -`_. - -Using such an isolated environment makes it possible to install a specific -version of scikit-learn with pip or conda and its dependencies independently of -any previously installed Python packages. In particular under Linux is it -discouraged to install pip packages alongside the packages managed by the +.. `scss/install.scss` overrides some default sphinx-design styling for the tabs + +.. div:: install-instructions + + .. tab-set:: + + .. tab-item:: pip + :class-label: tab-6 + :sync: packager-pip + + .. tab-set:: + + .. tab-item:: Windows + :class-label: tab-4 + :sync: os-windows + + Install the 64-bit version of Python 3, for instance from the + `official website `__. + + Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packages. + + .. prompt:: powershell + + python -m venv sklearn-env + sklearn-env\Scripts\activate # activate + pip install -U scikit-learn + + In order to check your installation, you can use: + + .. prompt:: powershell + + python -m pip show scikit-learn # show scikit-learn version and location + python -m pip freeze # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" + + .. tab-item:: macOS + :class-label: tab-4 + :sync: os-macos + + Install Python 3 using `homebrew `_ (`brew install python`) + or by manually installing the package from the `official website + `__. + + Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packges. + + .. prompt:: bash + + python -m venv sklearn-env + source sklearn-env/bin/activate # activate + pip install -U scikit-learn + + In order to check your installation, you can use: + + .. prompt:: bash + + python -m pip show scikit-learn # show scikit-learn version and location + python -m pip freeze # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" + + .. tab-item:: Linux + :class-label: tab-4 + :sync: os-linux + + Python 3 is usually installed by default on most Linux distributions. To + check if you have it installed, try: + + .. prompt:: bash + + python3 --version + pip3 --version + + If you don't have Python 3 installed, please install `python3` and + `python3-pip` from your distribution's package manager. + + Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packages. + + .. prompt:: bash + + python3 -m venv sklearn-env + source sklearn-env/bin/activate # activate + pip3 install -U scikit-learn + + In order to check your installation, you can use: + + .. prompt:: bash + + python3 -m pip show scikit-learn # show scikit-learn version and location + python3 -m pip freeze # show all installed packages in the environment + python3 -c "import sklearn; sklearn.show_versions()" + + .. tab-item:: conda + :class-label: tab-6 + :sync: packager-conda + + Install conda using the `Anaconda or miniconda installers + `__ + or the `miniforge installers + `__ (no administrator + permission required for any of those). Then run: + + .. prompt:: bash + + conda create -n sklearn-env -c conda-forge scikit-learn + conda activate sklearn-env + + In order to check your installation, you can use: + + .. prompt:: bash + + conda list scikit-learn # show scikit-learn version and location + conda list # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" + +Using an isolated environment such as pip venv or conda makes it possible to +install a specific version of scikit-learn with pip or conda and its dependencies +independently of any previously installed Python packages. In particular under Linux +it is discouraged to install pip packages alongside the packages managed by the package manager of the distribution (apt, dnf, pacman...). Note that you should always remember to activate the environment of your choice @@ -127,11 +165,10 @@ and NumPy and SciPy are not recompiled from source, which can happen when using particular configurations of operating system and hardware (such as Linux on a Raspberry Pi). - -Scikit-learn plotting capabilities (i.e., functions start with "plot\_" -and classes end with "Display") require Matplotlib. The examples require +Scikit-learn plotting capabilities (i.e., functions starting with `plot\_` +and classes ending with `Display`) require Matplotlib. The examples require Matplotlib and some examples require scikit-image, pandas, or seaborn. The -minimum version of Scikit-learn dependencies are listed below along with its +minimum version of scikit-learn dependencies are listed below along with its purpose. .. include:: min_dependency_table.rst @@ -141,32 +178,10 @@ purpose. Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4. Scikit-learn 0.21 supported Python 3.5-3.7. Scikit-learn 0.22 supported Python 3.5-3.8. - Scikit-learn 0.23 - 0.24 require Python 3.6 or newer. + Scikit-learn 0.23-0.24 required Python 3.6 or newer. Scikit-learn 1.0 supported Python 3.7-3.10. - Scikit-learn 1.1 and later requires Python 3.8 or newer. - - -.. _install_on_apple_silicon_m1: - -Installing on Apple Silicon M1 hardware -======================================= - -The recently introduced `macos/arm64` platform (sometimes also known as -`macos/aarch64`) requires the open source community to upgrade the build -configuration and automation to properly support it. - -At the time of writing (January 2021), the only way to get a working -installation of scikit-learn on this hardware is to install scikit-learn and its -dependencies from the conda-forge distribution, for instance using the miniforge -installers: - -https://github.com/conda-forge/miniforge - -The following issue tracks progress on making it possible to install -scikit-learn from PyPI with pip: - -https://github.com/scikit-learn/scikit-learn/issues/19137 - + Scikit-learn 1.1, 1.2 and 1.3 support Python 3.8-3.12 + Scikit-learn 1.4 requires Python 3.9 or newer. .. _install_by_distribution: @@ -191,7 +206,7 @@ Alpine Linux's package is provided through the `official repositories ``py3-scikit-learn`` for Python. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash sudo apk add py3-scikit-learn @@ -204,7 +219,7 @@ Arch Linux's package is provided through the `official repositories ``python-scikit-learn`` for Python. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash sudo pacman -S python-scikit-learn @@ -215,11 +230,11 @@ Debian/Ubuntu The Debian/Ubuntu package is split in three different packages called ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level implementations and bindings), ``python3-sklearn-doc`` (documentation). -Only the Python 3 version is available in the Debian Buster (the more recent -Debian distribution). +Note that scikit-learn requires Python 3, hence the need to use the `python3-` +suffixed package names. Packages can be installed using ``apt-get``: -.. prompt:: bash $ +.. prompt:: bash sudo apt-get install python3-sklearn python3-sklearn-lib python3-sklearn-doc @@ -228,10 +243,10 @@ Fedora ------ The Fedora package is called ``python3-scikit-learn`` for the python 3 version, -the only one available in Fedora30. +the only one available in Fedora. It can be installed using ``dnf``: -.. prompt:: bash $ +.. prompt:: bash sudo dnf install python3-scikit-learn @@ -239,10 +254,8 @@ It can be installed using ``dnf``: NetBSD ------ -scikit-learn is available via `pkgsrc-wip -`_: - - https://pkgsrc.se/math/py-scikit-learn +scikit-learn is available via `pkgsrc-wip `_: +https://pkgsrc.se/math/py-scikit-learn MacPorts for Mac OSX @@ -253,7 +266,7 @@ where ``XY`` denotes the Python version. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash sudo port install py39-scikit-learn @@ -275,21 +288,21 @@ Intel Extension for Scikit-learn Intel maintains an optimized x86_64 package, available in PyPI (via `pip`), and in the `main`, `conda-forge` and `intel` conda channels: -.. prompt:: bash $ +.. prompt:: bash conda install scikit-learn-intelex -This package has an Intel optimized version of many estimators. Whenever -an alternative implementation doesn't exist, scikit-learn implementation -is used as a fallback. Those optimized solvers come from the oneDAL -C++ library and are optimized for the x86_64 architecture, and are +This package has an Intel optimized version of many estimators. Whenever +an alternative implementation doesn't exist, scikit-learn implementation +is used as a fallback. Those optimized solvers come from the oneDAL +C++ library and are optimized for the x86_64 architecture, and are optimized for multi-core Intel CPUs. Note that those solvers are not enabled by default, please refer to the -`scikit-learn-intelex `_ +`scikit-learn-intelex `_ documentation for more details on usage scenarios. Direct export example: -.. prompt:: bash $ +.. prompt:: python >>> from sklearnex.neighbors import NearestNeighbors @@ -301,7 +314,7 @@ with `scikit-learn-intelex`, please report the issue on their WinPython for Windows ------------------------ +--------------------- The `WinPython `_ project distributes scikit-learn as an additional plugin. @@ -310,6 +323,10 @@ scikit-learn as an additional plugin. Troubleshooting =============== +If you encounter unexpected failures when installing scikit-learn, you may submit +an issue to the `issue tracker `_. +Before that, please also make sure to check the following common issues. + .. _windows_longpath: Error caused by file path length limit on Windows @@ -339,6 +356,6 @@ using the ``regedit`` tool: #. Reinstall scikit-learn (ignoring the previous broken installation): -.. prompt:: python $ + .. prompt:: powershell - pip install --exists-action=i scikit-learn + pip install --exists-action=i scikit-learn diff --git a/doc/js/scripts/api-search.js b/doc/js/scripts/api-search.js new file mode 100644 index 0000000000000..2148e0c429aaa --- /dev/null +++ b/doc/js/scripts/api-search.js @@ -0,0 +1,12 @@ +/** + * This script is for initializing the search table on the API index page. See + * DataTables documentation for more information: https://datatables.net/ + */ + +document.addEventListener("DOMContentLoaded", function () { + new DataTable("table.apisearch-table", { + order: [], // Keep original order + lengthMenu: [10, 25, 50, 100, { label: "All", value: -1 }], + pageLength: -1, // Show all entries by default + }); +}); diff --git a/doc/js/scripts/dropdown.js b/doc/js/scripts/dropdown.js new file mode 100644 index 0000000000000..ec2e6d9419a28 --- /dev/null +++ b/doc/js/scripts/dropdown.js @@ -0,0 +1,61 @@ +/** + * This script is used to add the functionality of collapsing/expanding all dropdowns + * on the page to the sphinx-design dropdowns. This is because some browsers cannot + * search into collapsed
(such as Firefox). + * + * The reason why the buttons are added to the page with JS (dynamic) instead of with + * sphinx (static) is that the button will not work without JS activated, so we do not + * want them to show up in that case. + */ + +function addToggleAllButtons() { + // Get all sphinx-design dropdowns + const allDropdowns = document.querySelectorAll("details.sd-dropdown"); + + function collapseAll() { + // Function to collapse all dropdowns on the page + console.log("[SK] Collapsing all dropdowns..."); + allDropdowns.forEach((dropdown) => { + dropdown.removeAttribute("open"); + }); + } + + function expandAll() { + // Function to expand all dropdowns on the page + console.log("[SK] Expanding all dropdowns..."); + allDropdowns.forEach((dropdown) => { + dropdown.setAttribute("open", ""); + }); + } + + const buttonConfigs = new Map([ + ["up", { desc: "Collapse", action: collapseAll }], + ["down", { desc: "Expand", action: expandAll }], + ]); + + allDropdowns.forEach((dropdown) => { + // Get the summary element of the dropdown, where we will place the buttons + const summaryTitle = dropdown.querySelector("summary.sd-summary-title"); + for (const [direction, config] of buttonConfigs) { + // Button with icon inside + var newButton = document.createElement("button"); + var newIcon = document.createElement("i"); + newIcon.classList.add("fa-solid", `fa-angles-${direction}`); + newButton.appendChild(newIcon); + // Class for styling; `sd-summary-up/down` is implemented by sphinx-design; + // `sk-toggle-all` is implemented by us + newButton.classList.add(`sd-summary-${direction}`, `sk-toggle-all`); + // Bootstrap tooltip configurations + newButton.setAttribute("data-bs-toggle", "tooltip"); + newButton.setAttribute("data-bs-placement", "top"); + newButton.setAttribute("data-bs-offset", "0,10"); + newButton.setAttribute("data-bs-title", `${config.desc} all dropdowns`); + // Assign the collapse/expand action to the button + newButton.onclick = config.action; + // Append the button to the summary element + summaryTitle.appendChild(newButton); + } + }); +} + +document.addEventListener("DOMContentLoaded", addToggleAllButtons); diff --git a/doc/js/scripts/vendor/svg-pan-zoom.min.js b/doc/js/scripts/vendor/svg-pan-zoom.min.js new file mode 100644 index 0000000000000..bde44a689bfe1 --- /dev/null +++ b/doc/js/scripts/vendor/svg-pan-zoom.min.js @@ -0,0 +1,31 @@ +/** + * svg-pan-zoom v3.6.2 + * + * https://github.com/bumbu/svg-pan-zoom + * + * Copyright 2009-2010 Andrea Leofreddi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +!function s(r,a,l){function u(e,t){if(!a[e]){if(!r[e]){var o="function"==typeof require&&require;if(!t&&o)return o(e,!0);if(h)return h(e,!0);var n=new Error("Cannot find module '"+e+"'");throw n.code="MODULE_NOT_FOUND",n}var i=a[e]={exports:{}};r[e][0].call(i.exports,function(t){return u(r[e][1][t]||t)},i,i.exports,s,r,a,l)}return a[e].exports}for(var h="function"==typeof require&&require,t=0;tthis.options.maxZoom*n.zoom&&(t=this.options.maxZoom*n.zoom/this.getZoom());var i=this.viewport.getCTM(),s=e.matrixTransform(i.inverse()),r=this.svg.createSVGMatrix().translate(s.x,s.y).scale(t).translate(-s.x,-s.y),a=i.multiply(r);a.a!==i.a&&this.viewport.setCTM(a)},i.prototype.zoom=function(t,e){this.zoomAtPoint(t,a.getSvgCenterPoint(this.svg,this.width,this.height),e)},i.prototype.publicZoom=function(t,e){e&&(t=this.computeFromRelativeZoom(t)),this.zoom(t,e)},i.prototype.publicZoomAtPoint=function(t,e,o){if(o&&(t=this.computeFromRelativeZoom(t)),"SVGPoint"!==r.getType(e)){if(!("x"in e&&"y"in e))throw new Error("Given point is invalid");e=a.createSVGPoint(this.svg,e.x,e.y)}this.zoomAtPoint(t,e,o)},i.prototype.getZoom=function(){return this.viewport.getZoom()},i.prototype.getRelativeZoom=function(){return this.viewport.getRelativeZoom()},i.prototype.computeFromRelativeZoom=function(t){return t*this.viewport.getOriginalState().zoom},i.prototype.resetZoom=function(){var t=this.viewport.getOriginalState();this.zoom(t.zoom,!0)},i.prototype.resetPan=function(){this.pan(this.viewport.getOriginalState())},i.prototype.reset=function(){this.resetZoom(),this.resetPan()},i.prototype.handleDblClick=function(t){var e;if((this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),this.options.controlIconsEnabled)&&-1<(t.target.getAttribute("class")||"").indexOf("svg-pan-zoom-control"))return!1;e=t.shiftKey?1/(2*(1+this.options.zoomScaleSensitivity)):2*(1+this.options.zoomScaleSensitivity);var o=a.getEventPoint(t,this.svg).matrixTransform(this.svg.getScreenCTM().inverse());this.zoomAtPoint(e,o)},i.prototype.handleMouseDown=function(t,e){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),r.mouseAndTouchNormalize(t,this.svg),this.options.dblClickZoomEnabled&&r.isDblClick(t,e)?this.handleDblClick(t):(this.state="pan",this.firstEventCTM=this.viewport.getCTM(),this.stateOrigin=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()))},i.prototype.handleMouseMove=function(t){if(this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&this.options.panEnabled){var e=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()),o=this.firstEventCTM.translate(e.x-this.stateOrigin.x,e.y-this.stateOrigin.y);this.viewport.setCTM(o)}},i.prototype.handleMouseUp=function(t){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&(this.state="none")},i.prototype.fit=function(){var t=this.viewport.getViewBox(),e=Math.min(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.contain=function(){var t=this.viewport.getViewBox(),e=Math.max(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.center=function(){var t=this.viewport.getViewBox(),e=.5*(this.width-(t.width+2*t.x)*this.getZoom()),o=.5*(this.height-(t.height+2*t.y)*this.getZoom());this.getPublicInstance().pan({x:e,y:o})},i.prototype.updateBBox=function(){this.viewport.simpleViewBoxCache()},i.prototype.pan=function(t){var e=this.viewport.getCTM();e.e=t.x,e.f=t.y,this.viewport.setCTM(e)},i.prototype.panBy=function(t){var e=this.viewport.getCTM();e.e+=t.x,e.f+=t.y,this.viewport.setCTM(e)},i.prototype.getPan=function(){var t=this.viewport.getState();return{x:t.x,y:t.y}},i.prototype.resize=function(){var t=a.getBoundingClientRectNormalized(this.svg);this.width=t.width,this.height=t.height;var e=this.viewport;e.options.width=this.width,e.options.height=this.height,e.processCTM(),this.options.controlIconsEnabled&&(this.getPublicInstance().disableControlIcons(),this.getPublicInstance().enableControlIcons())},i.prototype.destroy=function(){var e=this;for(var t in this.beforeZoom=null,this.onZoom=null,this.beforePan=null,this.onPan=null,(this.onUpdatedCTM=null)!=this.options.customEventsHandler&&this.options.customEventsHandler.destroy({svgElement:this.svg,eventsListenerElement:this.options.eventsListenerElement,instance:this.getPublicInstance()}),this.eventListeners)(this.options.eventsListenerElement||this.svg).removeEventListener(t,this.eventListeners[t],!this.options.preventMouseEventsDefault&&h);this.disableMouseWheelZoom(),this.getPublicInstance().disableControlIcons(),this.reset(),c=c.filter(function(t){return t.svg!==e.svg}),delete this.options,delete this.viewport,delete this.publicInstance,delete this.pi,this.getPublicInstance=function(){return null}},i.prototype.getPublicInstance=function(){var o=this;return this.publicInstance||(this.publicInstance=this.pi={enablePan:function(){return o.options.panEnabled=!0,o.pi},disablePan:function(){return o.options.panEnabled=!1,o.pi},isPanEnabled:function(){return!!o.options.panEnabled},pan:function(t){return o.pan(t),o.pi},panBy:function(t){return o.panBy(t),o.pi},getPan:function(){return o.getPan()},setBeforePan:function(t){return o.options.beforePan=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnPan:function(t){return o.options.onPan=null===t?null:r.proxy(t,o.publicInstance),o.pi},enableZoom:function(){return o.options.zoomEnabled=!0,o.pi},disableZoom:function(){return o.options.zoomEnabled=!1,o.pi},isZoomEnabled:function(){return!!o.options.zoomEnabled},enableControlIcons:function(){return o.options.controlIconsEnabled||(o.options.controlIconsEnabled=!0,s.enable(o)),o.pi},disableControlIcons:function(){return o.options.controlIconsEnabled&&(o.options.controlIconsEnabled=!1,s.disable(o)),o.pi},isControlIconsEnabled:function(){return!!o.options.controlIconsEnabled},enableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!0,o.pi},disableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!1,o.pi},isDblClickZoomEnabled:function(){return!!o.options.dblClickZoomEnabled},enableMouseWheelZoom:function(){return o.enableMouseWheelZoom(),o.pi},disableMouseWheelZoom:function(){return o.disableMouseWheelZoom(),o.pi},isMouseWheelZoomEnabled:function(){return!!o.options.mouseWheelZoomEnabled},setZoomScaleSensitivity:function(t){return o.options.zoomScaleSensitivity=t,o.pi},setMinZoom:function(t){return o.options.minZoom=t,o.pi},setMaxZoom:function(t){return o.options.maxZoom=t,o.pi},setBeforeZoom:function(t){return o.options.beforeZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnZoom:function(t){return o.options.onZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},zoom:function(t){return o.publicZoom(t,!0),o.pi},zoomBy:function(t){return o.publicZoom(t,!1),o.pi},zoomAtPoint:function(t,e){return o.publicZoomAtPoint(t,e,!0),o.pi},zoomAtPointBy:function(t,e){return o.publicZoomAtPoint(t,e,!1),o.pi},zoomIn:function(){return this.zoomBy(1+o.options.zoomScaleSensitivity),o.pi},zoomOut:function(){return this.zoomBy(1/(1+o.options.zoomScaleSensitivity)),o.pi},getZoom:function(){return o.getRelativeZoom()},setOnUpdatedCTM:function(t){return o.options.onUpdatedCTM=null===t?null:r.proxy(t,o.publicInstance),o.pi},resetZoom:function(){return o.resetZoom(),o.pi},resetPan:function(){return o.resetPan(),o.pi},reset:function(){return o.reset(),o.pi},fit:function(){return o.fit(),o.pi},contain:function(){return o.contain(),o.pi},center:function(){return o.center(),o.pi},updateBBox:function(){return o.updateBBox(),o.pi},resize:function(){return o.resize(),o.pi},getSizes:function(){return{width:o.width,height:o.height,realZoom:o.getZoom(),viewBox:o.viewport.getViewBox()}},destroy:function(){return o.destroy(),o.pi}}),this.publicInstance};var c=[];e.exports=function(t,e){var o=r.getSvg(t);if(null===o)return null;for(var n=c.length-1;0<=n;n--)if(c[n].svg===o)return c[n].instance.getPublicInstance();return c.push({svg:o,instance:new i(o,e)}),c[c.length-1].instance.getPublicInstance()}},{"./control-icons":1,"./shadow-viewport":2,"./svg-utilities":5,"./uniwheel":6,"./utilities":7}],5:[function(t,e,o){var l=t("./utilities"),s="unknown";document.documentMode&&(s="ie"),e.exports={svgNS:"http://www.w3.org/2000/svg",xmlNS:"http://www.w3.org/XML/1998/namespace",xmlnsNS:"http://www.w3.org/2000/xmlns/",xlinkNS:"http://www.w3.org/1999/xlink",evNS:"http://www.w3.org/2001/xml-events",getBoundingClientRectNormalized:function(t){if(t.clientWidth&&t.clientHeight)return{width:t.clientWidth,height:t.clientHeight};if(t.getBoundingClientRect())return t.getBoundingClientRect();throw new Error("Cannot get BoundingClientRect for SVG.")},getOrCreateViewport:function(t,e){var o=null;if(!(o=l.isElement(e)?e:t.querySelector(e))){var n=Array.prototype.slice.call(t.childNodes||t.children).filter(function(t){return"defs"!==t.nodeName&&"#text"!==t.nodeName});1===n.length&&"g"===n[0].nodeName&&null===n[0].getAttribute("transform")&&(o=n[0])}if(!o){var i="viewport-"+(new Date).toISOString().replace(/\D/g,"");(o=document.createElementNS(this.svgNS,"g")).setAttribute("id",i);var s=t.childNodes||t.children;if(s&&0`. + +.. raw:: html + + + + + + +
+ +.. raw:: html + :file: images/ml_map.svg + +.. raw:: html + +
diff --git a/doc/authors.rst b/doc/maintainers.rst similarity index 92% rename from doc/authors.rst rename to doc/maintainers.rst index e2d027fa40506..0ba69d8afa60d 100644 --- a/doc/authors.rst +++ b/doc/maintainers.rst @@ -78,6 +78,10 @@

Hanmin Qin

+
+

Omar Salman

+
+

Bertrand Thirion

@@ -94,6 +98,10 @@

Nelle Varoquaux

+
+

Yao Xiao

+
+

Roman Yurchak

diff --git a/doc/authors_emeritus.rst b/doc/maintainers_emeritus.rst similarity index 97% rename from doc/authors_emeritus.rst rename to doc/maintainers_emeritus.rst index a56e2bc408ff4..b979b77bba974 100644 --- a/doc/authors_emeritus.rst +++ b/doc/maintainers_emeritus.rst @@ -20,7 +20,6 @@ - Wei Li - Paolo Losi - Gilles Louppe -- Chiara Marmo - Vincent Michel - Jarrod Millman - Alexandre Passos diff --git a/doc/make.bat b/doc/make.bat index b7e269a6a7836..2a32bcb678f62 100644 --- a/doc/make.bat +++ b/doc/make.bat @@ -29,8 +29,30 @@ if "%1" == "help" ( ) if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* + if exist %BUILDDIR%\ ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s "%%i" + del /q /s %BUILDDIR%\* + echo. Removed %BUILDDIR%\* + ) + if exist auto_examples\ ( + rmdir /q /s auto_examples + echo. Removed auto_examples\ + ) + if exist generated\ ( + for /d %%i in (generated\*) do rmdir /q /s "%%i" + del /q /s generated\* + echo. Removed generated\* + ) + if exist modules\generated\ ( + rmdir /q /s modules\generated + echo. Removed modules\generated\ + ) + if exist css\styles\ ( + rmdir /q /s css\styles + echo. Removed css\styles\ + ) + for %%i in (api\*.rst) do del /q "%%i" + echo. Removed api\*.rst goto end ) @@ -46,6 +68,7 @@ if "%1" == "html-noplot" ( %SPHINXBUILD% -D plot_gallery=0 -b html %ALLSPHINXOPTS% %BUILDDIR%/html echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html + goto end ) if "%1" == "dirhtml" ( diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index a3a443995cfc7..d319b311dddd7 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -1,48 +1,68 @@ - -.. _metadata_routing: - .. currentmodule:: sklearn .. TODO: update doc/conftest.py once document is updated and examples run. +.. _metadata_routing: + Metadata Routing ================ .. note:: - The Metadata Routing API is experimental, and is not implemented yet for many - estimators. It may change without the usual deprecation cycle. By default - this feature is not enabled. You can enable this feature by setting the - ``enable_metadata_routing`` flag to ``True``: + The Metadata Routing API is experimental, and is not yet implemented for all + estimators. Please refer to the :ref:`list of supported and unsupported + models ` for more information. It may change without + the usual deprecation cycle. By default this feature is not enabled. You can + enable it by setting the ``enable_metadata_routing`` flag to + ``True``:: >>> import sklearn >>> sklearn.set_config(enable_metadata_routing=True) -This guide demonstrates how metadata such as ``sample_weight`` can be routed -and passed along to estimators, scorers, and CV splitters through -meta-estimators such as :class:`~pipeline.Pipeline` and -:class:`~model_selection.GridSearchCV`. In order to pass metadata to a method -such as ``fit`` or ``score``, the object consuming the metadata, must *request* -it. For estimators and splitters, this is done via ``set_*_request`` methods, -e.g. ``set_fit_request(...)``, and for scorers this is done via the -``set_score_request`` method. For grouped splitters such as -:class:`~model_selection.GroupKFold`, a ``groups`` parameter is requested by -default. This is best demonstrated by the following examples. - -If you are developing a scikit-learn compatible estimator or meta-estimator, -you can check our related developer guide: -:ref:`sphx_glr_auto_examples_miscellaneous_plot_metadata_routing.py`. - -.. note:: Note that the methods and requirements introduced in this document are only - relevant if you want to pass metadata (e.g. ``sample_weight``) to a method. + relevant if you want to pass :term:`metadata` (e.g. ``sample_weight``) to a method. If you're only passing ``X`` and ``y`` and no other parameter / metadata to - methods such as ``fit``, ``transform``, etc, then you don't need to set + methods such as :term:`fit`, :term:`transform`, etc., then you don't need to set anything. +This guide demonstrates how :term:`metadata` can be routed and passed between objects in +scikit-learn. If you are developing a scikit-learn compatible estimator or +meta-estimator, you can check our related developer guide: +:ref:`sphx_glr_auto_examples_miscellaneous_plot_metadata_routing.py`. + +Metadata is data that an estimator, scorer, or CV splitter takes into account if the +user explicitly passes it as a parameter. For instance, :class:`~cluster.KMeans` accepts +`sample_weight` in its `fit()` method and considers it to calculate its centroids. +`classes` are consumed by some classifiers and `groups` are used in some splitters, but +any data that is passed into an object's methods apart from X and y can be considered as +metadata. Prior to scikit-learn version 1.3, there was no single API for passing +metadata like that if these objects were used in conjunction with other objects, e.g. a +scorer accepting `sample_weight` inside a :class:`~model_selection.GridSearchCV`. + +With the Metadata Routing API, we can transfer metadata to estimators, scorers, and CV +splitters using :term:`meta-estimators` (such as :class:`~pipeline.Pipeline` or +:class:`~model_selection.GridSearchCV`) or functions such as +:func:`~model_selection.cross_validate` which route data to other objects. In order to +pass metadata to a method like ``fit`` or ``score``, the object consuming the metadata, +must *request* it. This is done via `set_{method}_request()` methods, where `{method}` +is substituted by the name of the method that requests the metadata. For instance, +estimators that use the metadata in their `fit()` method would use `set_fit_request()`, +and scorers would use `set_score_request()`. These methods allow us to specify which +metadata to request, for instance `set_fit_request(sample_weight=True)`. + +For grouped splitters such as :class:`~model_selection.GroupKFold`, a +``groups`` parameter is requested by default. This is best demonstrated by the +following examples. + Usage Examples ************** -Here we present a few examples to show different common use-cases. The examples -in this section require the following imports and data:: +Here we present a few examples to show some common use-cases. Our goal is to pass +`sample_weight` and `groups` through :func:`~model_selection.cross_validate`, which +routes the metadata to :class:`~linear_model.LogisticRegressionCV` and to a custom scorer +made with :func:`~metrics.make_scorer`, both of which *can* use the metadata in their +methods. In these examples we want to individually set whether to use the metadata +within the different :term:`consumers `. + +The examples in this section require the following imports and data:: >>> import numpy as np >>> from sklearn.metrics import make_scorer, accuracy_score @@ -61,47 +81,50 @@ in this section require the following imports and data:: Weighted scoring and fitting ---------------------------- -Here :class:`~model_selection.GroupKFold` requests ``groups`` by default. However, we -need to explicitly request weights for our scorer and the internal cross validation of -:class:`~linear_model.LogisticRegressionCV`. Both of these *consumers* know how to use -metadata called ``sample_weight``:: +The splitter used internally in :class:`~linear_model.LogisticRegressionCV`, +:class:`~model_selection.GroupKFold`, requests ``groups`` by default. However, we need +to explicitly request `sample_weight` for it and for our custom scorer by specifying +`sample_weight=True` in :class:`~linear_model.LogisticRegressionCV`s `set_fit_request()` +method and in :func:`~metrics.make_scorer`s `set_score_request()` method. Both +:term:`consumers ` know how to use ``sample_weight`` in their `fit()` or +`score()` methods. We can then pass the metadata in +:func:`~model_selection.cross_validate` which will route it to any active consumers:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( - ... sample_weight=True - ... ) + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) >>> lr = LogisticRegressionCV( - ... cv=GroupKFold(), scoring=weighted_acc, + ... cv=GroupKFold(), + ... scoring=weighted_acc ... ).set_fit_request(sample_weight=True) >>> cv_results = cross_validate( ... lr, ... X, ... y, - ... props={"sample_weight": my_weights, "groups": my_groups}, + ... params={"sample_weight": my_weights, "groups": my_groups}, ... cv=GroupKFold(), ... scoring=weighted_acc, ... ) -Note that in this example, ``my_weights`` is passed to both the scorer and -:class:`~linear_model.LogisticRegressionCV`. +Note that in this example, :func:`~model_selection.cross_validate` routes ``my_weights`` +to both the scorer and :class:`~linear_model.LogisticRegressionCV`. -Error handling: if ``props={"sample_weigh": my_weights, ...}`` were passed -(note the typo), :func:`~model_selection.cross_validate` would raise an error, -since ``sample_weigh`` was not requested by any of its underlying objects. +If we would pass `sample_weight` in the params of +:func:`~model_selection.cross_validate`, but not set any object to request it, +`UnsetMetadataPassedError` would be raised, hinting to us that we need to explicitly set +where to route it. The same applies if ``params={"sample_weights": my_weights, ...}`` +were passed (note the typo, i.e. ``weights`` instead of ``weight``), since +``sample_weights`` was not requested by any of its underlying objects. Weighted scoring and unweighted fitting --------------------------------------- -When passing metadata such as ``sample_weight`` around, all scikit-learn -estimators require weights to be either explicitly requested or not requested -(i.e. ``True`` or ``False``) when used in another router such as a -:class:`~pipeline.Pipeline` or a ``*GridSearchCV``. To perform an unweighted -fit, we need to configure :class:`~linear_model.LogisticRegressionCV` to not -request sample weights, so that :func:`~model_selection.cross_validate` does -not pass the weights along:: +When passing metadata such as ``sample_weight`` into a :term:`router` +(:term:`meta-estimators` or routing function), all ``sample_weight`` :term:`consumers +` require weights to be either explicitly requested or explicitly not +requested (i.e. ``True`` or ``False``). Thus, to perform an unweighted fit, we need to +configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so +that :func:`~model_selection.cross_validate` does not pass the weights along:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( - ... sample_weight=True - ... ) + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) >>> lr = LogisticRegressionCV( ... cv=GroupKFold(), scoring=weighted_acc, ... ).set_fit_request(sample_weight=False) @@ -110,28 +133,29 @@ not pass the weights along:: ... X, ... y, ... cv=GroupKFold(), - ... props={"sample_weight": my_weights, "groups": my_groups}, + ... params={"sample_weight": my_weights, "groups": my_groups}, ... scoring=weighted_acc, ... ) -If :meth:`linear_model.LogisticRegressionCV.set_fit_request` has not -been called, :func:`~model_selection.cross_validate` will raise an -error because ``sample_weight`` is passed in but -:class:`~linear_model.LogisticRegressionCV` would not be explicitly configured -to recognize the weights. +If :meth:`linear_model.LogisticRegressionCV.set_fit_request` had not been called, +:func:`~model_selection.cross_validate` would raise an error because ``sample_weight`` +is passed but :class:`~linear_model.LogisticRegressionCV` would not be explicitly +configured to recognize the weights. Unweighted feature selection ---------------------------- -Setting request values for metadata are only required if the object, e.g. estimator, -scorer, etc., is a consumer of that metadata Unlike -:class:`~linear_model.LogisticRegressionCV`, :class:`~feature_selection.SelectKBest` -doesn't consume weights and therefore no request value for ``sample_weight`` on its -instance is set and ``sample_weight`` is not routed to it:: +Routing metadata is only possible if the object's method knows how to use the metadata, +which in most cases means they have it as an explicit parameter. Only then we can set +request values for metadata using `set_fit_request(sample_weight=True)`, for instance. +This makes the object a :term:`consumer `. - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( - ... sample_weight=True - ... ) +Unlike :class:`~linear_model.LogisticRegressionCV`, +:class:`~feature_selection.SelectKBest` can't consume weights and therefore no request +value for ``sample_weight`` on its instance is set and ``sample_weight`` is not routed +to it:: + + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) >>> lr = LogisticRegressionCV( ... cv=GroupKFold(), scoring=weighted_acc, ... ).set_fit_request(sample_weight=True) @@ -142,12 +166,12 @@ instance is set and ``sample_weight`` is not routed to it:: ... X, ... y, ... cv=GroupKFold(), - ... props={"sample_weight": my_weights, "groups": my_groups}, + ... params={"sample_weight": my_weights, "groups": my_groups}, ... scoring=weighted_acc, ... ) -Advanced: Different scoring and fitting weights ------------------------------------------------ +Different scoring and fitting weights +------------------------------------- Despite :func:`~metrics.make_scorer` and :class:`~linear_model.LogisticRegressionCV` both expecting the key @@ -166,7 +190,7 @@ consumers. In this example, we pass ``scoring_weight`` to the scorer, and ... X, ... y, ... cv=GroupKFold(), - ... props={ + ... params={ ... "scoring_weight": my_weights, ... "fitting_weight": my_other_weights, ... "groups": my_groups, @@ -177,41 +201,41 @@ consumers. In this example, we pass ``scoring_weight`` to the scorer, and API Interface ************* -A *consumer* is an object (estimator, meta-estimator, scorer, splitter) which -accepts and uses some metadata in at least one of its methods (``fit``, -``predict``, ``inverse_transform``, ``transform``, ``score``, ``split``). -Meta-estimators which only forward the metadata to other objects (the child -estimator, scorers, or splitters) and don't use the metadata themselves are not -consumers. (Meta-)Estimators which route metadata to other objects are -*routers*. A(n) (meta-)estimator can be a consumer and a router at the same time. -(Meta-)Estimators and splitters expose a ``set_*_request`` method for each -method which accepts at least one metadata. For instance, if an estimator -supports ``sample_weight`` in ``fit`` and ``score``, it exposes +A :term:`consumer` is an object (estimator, meta-estimator, scorer, splitter) which +accepts and uses some :term:`metadata` in at least one of its methods (for instance +``fit``, ``predict``, ``inverse_transform``, ``transform``, ``score``, ``split``). +Meta-estimators which only forward the metadata to other objects (child estimators, +scorers, or splitters) and don't use the metadata themselves are not consumers. +(Meta-)Estimators which route metadata to other objects are :term:`routers `. +A(n) (meta-)estimator can be a :term:`consumer` and a :term:`router` at the same time. +(Meta-)Estimators and splitters expose a `set_{method}_request` method for each method +which accepts at least one metadata. For instance, if an estimator supports +``sample_weight`` in ``fit`` and ``score``, it exposes ``estimator.set_fit_request(sample_weight=value)`` and ``estimator.set_score_request(sample_weight=value)``. Here ``value`` can be: -- ``True``: method requests a ``sample_weight``. This means if the metadata is - provided, it will be used, otherwise no error is raised. +- ``True``: method requests a ``sample_weight``. This means if the metadata is provided, + it will be used, otherwise no error is raised. - ``False``: method does not request a ``sample_weight``. -- ``None``: router will raise an error if ``sample_weight`` is passed. This is - in almost all cases the default value when an object is instantiated and - ensures the user sets the metadata requests explicitly when a metadata is - passed. The only exception are ``Group*Fold`` splitters. -- ``"param_name"``: if this estimator is used in a meta-estimator, the - meta-estimator should forward ``"param_name"`` as ``sample_weight`` to this - estimator. This means the mapping between the metadata required by the - object, e.g. ``sample_weight`` and what is provided by the user, e.g. - ``my_weights`` is done at the router level, and not by the object, e.g. - estimator, itself. +- ``None``: router will raise an error if ``sample_weight`` is passed. This is in almost + all cases the default value when an object is instantiated and ensures the user sets + the metadata requests explicitly when a metadata is passed. The only exception are + ``Group*Fold`` splitters. +- ``"param_name"``: alias for ``sample_weight`` if we want to pass different weights to + different consumers. If aliasing is used the meta-estimator should not forward + ``"param_name"`` to the consumer, but ``sample_weight`` instead, because the consumer + will expect a param called ``sample_weight``. This means the mapping between the + metadata required by the object, e.g. ``sample_weight`` and the variable name provided + by the user, e.g. ``my_weights`` is done at the router level, and not by the consuming + object itself. Metadata are requested in the same way for scorers using ``set_score_request``. -If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata -request for all objects which potentially can consume ``sample_weight`` should -be set by the user, otherwise an error is raised by the router object. For -example, the following code raises an error, since it hasn't been explicitly -specified whether ``sample_weight`` should be passed to the estimator's scorer -or not:: +If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for +all objects which potentially can consume ``sample_weight`` should be set by the user, +otherwise an error is raised by the router object. For example, the following code +raises an error, since it hasn't been explicitly specified whether ``sample_weight`` +should be passed to the estimator's scorer or not:: >>> param_grid = {"C": [0.1, 1]} >>> lr = LogisticRegression().set_fit_request(sample_weight=True) @@ -221,11 +245,85 @@ or not:: ... ).fit(X, y, sample_weight=my_weights) ... except ValueError as e: ... print(e) - [sample_weight] are passed but are not explicitly set as requested or not for - LogisticRegression.score + [sample_weight] are passed but are not explicitly set as requested or not + requested for LogisticRegression.score, which is used within GridSearchCV.fit. + Call `LogisticRegression.set_score_request({metadata}=True/False)` for each metadata + you want to request/ignore. The issue can be fixed by explicitly setting the request value:: >>> lr = LogisticRegression().set_fit_request( ... sample_weight=True ... ).set_score_request(sample_weight=False) + +At the end of the **Usage Examples** section, we disable the configuration flag for +metadata routing:: + + >>> sklearn.set_config(enable_metadata_routing=False) + +.. _metadata_routing_models: + +Metadata Routing Support Status +******************************* +All consumers (i.e. simple estimators which only consume metadata and don't +route them) support metadata routing, meaning they can be used inside +meta-estimators which support metadata routing. However, development of support +for metadata routing for meta-estimators is in progress, and here is a list of +meta-estimators and tools which support and don't yet support metadata routing. + + +Meta-estimators and functions supporting metadata routing: + +- :class:`sklearn.calibration.CalibratedClassifierCV` +- :class:`sklearn.compose.ColumnTransformer` +- :class:`sklearn.covariance.GraphicalLassoCV` +- :class:`sklearn.ensemble.VotingClassifier` +- :class:`sklearn.ensemble.VotingRegressor` +- :class:`sklearn.ensemble.BaggingClassifier` +- :class:`sklearn.ensemble.BaggingRegressor` +- :class:`sklearn.feature_selection.SelectFromModel` +- :class:`sklearn.impute.IterativeImputer` +- :class:`sklearn.linear_model.ElasticNetCV` +- :class:`sklearn.linear_model.LarsCV` +- :class:`sklearn.linear_model.LassoCV` +- :class:`sklearn.linear_model.LassoLarsCV` +- :class:`sklearn.linear_model.LogisticRegressionCV` +- :class:`sklearn.linear_model.MultiTaskElasticNetCV` +- :class:`sklearn.linear_model.MultiTaskLassoCV` +- :class:`sklearn.linear_model.RANSACRegressor` +- :class:`sklearn.linear_model.RidgeClassifierCV` +- :class:`sklearn.linear_model.RidgeCV` +- :class:`sklearn.model_selection.GridSearchCV` +- :class:`sklearn.model_selection.HalvingGridSearchCV` +- :class:`sklearn.model_selection.HalvingRandomSearchCV` +- :class:`sklearn.model_selection.RandomizedSearchCV` +- :func:`sklearn.model_selection.cross_validate` +- :func:`sklearn.model_selection.cross_val_score` +- :func:`sklearn.model_selection.cross_val_predict` +- :class:`sklearn.multiclass.OneVsOneClassifier` +- :class:`sklearn.multiclass.OneVsRestClassifier` +- :class:`sklearn.multiclass.OutputCodeClassifier` +- :class:`sklearn.multioutput.ClassifierChain` +- :class:`sklearn.multioutput.MultiOutputClassifier` +- :class:`sklearn.multioutput.MultiOutputRegressor` +- :class:`sklearn.linear_model.OrthogonalMatchingPursuitCV` +- :class:`sklearn.multioutput.RegressorChain` +- :class:`sklearn.pipeline.FeatureUnion` +- :class:`sklearn.pipeline.Pipeline` + +Meta-estimators and tools not supporting metadata routing yet: + +- :class:`sklearn.compose.TransformedTargetRegressor` +- :class:`sklearn.ensemble.AdaBoostClassifier` +- :class:`sklearn.ensemble.AdaBoostRegressor` +- :class:`sklearn.ensemble.StackingClassifier` +- :class:`sklearn.ensemble.StackingRegressor` +- :class:`sklearn.feature_selection.RFE` +- :class:`sklearn.feature_selection.RFECV` +- :class:`sklearn.feature_selection.SequentialFeatureSelector` +- :class:`sklearn.impute.IterativeImputer` +- :class:`sklearn.linear_model.RANSACRegressor` +- :class:`sklearn.model_selection.learning_curve` +- :class:`sklearn.model_selection.permutation_test_score` +- :class:`sklearn.model_selection.validation_curve` +- :class:`sklearn.semi_supervised.SelfTrainingClassifier` diff --git a/doc/min_dependency_substitutions.rst.template b/doc/min_dependency_substitutions.rst.template new file mode 100644 index 0000000000000..946de84902b3b --- /dev/null +++ b/doc/min_dependency_substitutions.rst.template @@ -0,0 +1,3 @@ +{% for package, (version, _) in dependent_packages.items() -%} +.. |{{ package|capitalize }}MinVersion| replace:: {{ version }} +{% endfor %} diff --git a/doc/min_dependency_table.rst.template b/doc/min_dependency_table.rst.template new file mode 100644 index 0000000000000..fbe58633e913a --- /dev/null +++ b/doc/min_dependency_table.rst.template @@ -0,0 +1,13 @@ +.. list-table:: + :header-rows: 1 + + * - Dependency + - Minimum Version + - Purpose + + {% for package, (version, tags) in dependent_packages.items() -%} + * - {{ package }} + - {{ version }} + - {{ tags }} + + {% endfor %} diff --git a/doc/model_persistence.rst b/doc/model_persistence.rst index 53f01fd019d79..cd5347d302123 100644 --- a/doc/model_persistence.rst +++ b/doc/model_persistence.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _model_persistence: ================= @@ -9,161 +5,349 @@ Model persistence ================= After training a scikit-learn model, it is desirable to have a way to persist -the model for future use without having to retrain. The following sections give -you some hints on how to persist a scikit-learn model. +the model for future use without having to retrain. Based on your use-case, +there are a few different ways to persist a scikit-learn model, and here we +help you decide which one suits you best. In order to make a decision, you need +to answer the following questions: + +1. Do you need the Python object after persistence, or do you only need to + persist in order to serve the model and get predictions out of it? + +If you only need to serve the model and no further investigation on the Python +object itself is required, then :ref:`ONNX ` might be the +best fit for you. Note that not all models are supported by ONNX. + +In case ONNX is not suitable for your use-case, the next question is: + +2. Do you absolutely trust the source of the model, or are there any security + concerns regarding where the persisted model comes from? + +If you have security concerns, then you should consider using :ref:`skops.io +` which gives you back the Python object, but unlike +`pickle` based persistence solutions, loading the persisted model doesn't +automatically allow arbitrary code execution. Note that this requires manual +investigation of the persisted file, which :mod:`skops.io` allows you to do. + +The other solutions assume you absolutely trust the source of the file to be +loaded, as they are all susceptible to arbitrary code execution upon loading +the persisted file since they all use the pickle protocol under the hood. + +3. Do you care about the performance of loading the model, and sharing it + between processes where a memory mapped object on disk is beneficial? -Python specific serialization ------------------------------ +If yes, then you can consider using :ref:`joblib `. If this +is not a major concern for you, then you can use the built-in :mod:`pickle` +module. -It is possible to save a model in scikit-learn by using Python's built-in -persistence model, namely `pickle -`_:: +4. Did you try :mod:`pickle` or :mod:`joblib` and found that the model cannot + be persisted? It can happen for instance when you have user defined + functions in your model. - >>> from sklearn import svm +If yes, then you can use `cloudpickle`_ which can serialize certain objects +which cannot be serialized by :mod:`pickle` or :mod:`joblib`. + + +Workflow Overview +----------------- + +In a typical workflow, the first step is to train the model using scikit-learn +and scikit-learn compatible libraries. Note that support for scikit-learn and +third party estimators varies across the different persistence methods. + +Train and Persist the Model +........................... + +Creating an appropriate model depends on your use-case. As an example, here we +train a :class:`sklearn.ensemble.HistGradientBoostingClassifier` on the iris +dataset:: + + >>> from sklearn import ensemble >>> from sklearn import datasets - >>> clf = svm.SVC() - >>> X, y= datasets.load_iris(return_X_y=True) + >>> clf = ensemble.HistGradientBoostingClassifier() + >>> X, y = datasets.load_iris(return_X_y=True) >>> clf.fit(X, y) - SVC() + HistGradientBoostingClassifier() + +Once the model is trained, you can persist it using your desired method, and +then you can load the model in a separate environment and get predictions from +it given input data. Here there are two major paths depending on how you +persist and plan to serve the model: - >>> import pickle - >>> s = pickle.dumps(clf) - >>> clf2 = pickle.loads(s) - >>> clf2.predict(X[0:1]) - array([0]) - >>> y[0] - 0 +- :ref:`ONNX `: You need an `ONNX` runtime and an environment + with appropriate dependencies installed to load the model and use the runtime + to get predictions. This environment can be minimal and does not necessarily + even require Python to be installed to load the model and compute + predictions. Also note that `onnxruntime` typically requires much less RAM + than Python to to compute predictions from small models. -In the specific case of scikit-learn, it may be better to use joblib's -replacement of pickle (``dump`` & ``load``), which is more efficient on -objects that carry large numpy arrays internally as is often the case for -fitted scikit-learn estimators, but can only pickle to the disk and not to a -string:: +- :mod:`skops.io`, :mod:`pickle`, :mod:`joblib`, `cloudpickle`_: You need a + Python environment with the appropriate dependencies installed to load the + model and get predictions from it. This environment should have the same + **packages** and the same **versions** as the environment where the model was + trained. Note that none of these methods support loading a model trained with + a different version of scikit-learn, and possibly different versions of other + dependencies such as `numpy` and `scipy`. Another concern would be running + the persisted model on a different hardware, and in most cases you should be + able to load your persisted model on a different hardware. + + +.. _onnx_persistence: + +ONNX +---- + +`ONNX`, or `Open Neural Network Exchange `__ format is best +suitable in use-cases where one needs to persist the model and then use the +persisted artifact to get predictions without the need to load the Python +object itself. It is also useful in cases where the serving environment needs +to be lean and minimal, since the `ONNX` runtime does not require `python`. + +`ONNX` is a binary serialization of the model. It has been developed to improve +the usability of the interoperable representation of data models. It aims to +facilitate the conversion of the data models between different machine learning +frameworks, and to improve their portability on different computing +architectures. More details are available from the `ONNX tutorial +`__. To convert scikit-learn model to `ONNX` +`sklearn-onnx `__ has been developed. However, +not all scikit-learn models are supported, and it is limited to the core +scikit-learn and does not support most third party estimators. One can write a +custom converter for third party or custom estimators, but the documentation to +do that is sparse and it might be challenging to do so. + +.. dropdown:: Using ONNX + + To convert the model to `ONNX` format, you need to give the converter some + information about the input as well, about which you can read more `here + `__:: + + from skl2onnx import to_onnx + onx = to_onnx(clf, X[:1].astype(numpy.float32), target_opset=12) + with open("filename.onnx", "wb") as f: + f.write(onx.SerializeToString()) + + You can load the model in Python and use the `ONNX` runtime to get + predictions:: + + from onnxruntime import InferenceSession + with open("filename.onnx", "rb") as f: + onx = f.read() + sess = InferenceSession(onx, providers=["CPUExecutionProvider"]) + pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] + +.. _skops_persistence: + +`skops.io` +---------- + +:mod:`skops.io` avoids using :mod:`pickle` and only loads files which have types +and references to functions which are trusted either by default or by the user. +Therefore it provides a more secure format than :mod:`pickle`, :mod:`joblib`, +and `cloudpickle`_. + + +.. dropdown:: Using skops + + The API is very similar to :mod:`pickle`, and you can persist your models as + explained in the `documentation + `__ using + :func:`skops.io.dump` and :func:`skops.io.dumps`:: + + import skops.io as sio + obj = sio.dump(clf, "filename.skops") + + And you can load them back using :func:`skops.io.load` and + :func:`skops.io.loads`. However, you need to specify the types which are + trusted by you. You can get existing unknown types in a dumped object / file + using :func:`skops.io.get_untrusted_types`, and after checking its contents, + pass it to the load function:: + + unknown_types = sio.get_untrusted_types(file="filename.skops") + # investigate the contents of unknown_types, and only load if you trust + # everything you see. + clf = sio.load("filename.skops", trusted=unknown_types) + + Please report issues and feature requests related to this format on the `skops + issue tracker `__. + + +.. _pickle_persistence: + +`pickle`, `joblib`, and `cloudpickle` +------------------------------------- + +These three modules / packages, use the `pickle` protocol under the hood, but +come with slight variations: - >>> from joblib import dump, load - >>> dump(clf, 'filename.joblib') # doctest: +SKIP +- :mod:`pickle` is a module from the Python Standard Library. It can serialize + and deserialize any Python object, including custom Python classes and + objects. +- :mod:`joblib` is more efficient than `pickle` when working with large machine + learning models or large numpy arrays. +- `cloudpickle`_ can serialize certain objects which cannot be serialized by + :mod:`pickle` or :mod:`joblib`, such as user defined functions and lambda + functions. This can happen for instance, when using a + :class:`~sklearn.preprocessing.FunctionTransformer` and using a custom + function to transform the data. -Later you can load back the pickled model (possibly in another Python process) -with:: +.. dropdown:: Using `pickle`, `joblib`, or `cloudpickle` - >>> clf = load('filename.joblib') # doctest:+SKIP + Depending on your use-case, you can choose one of these three methods to + persist and load your scikit-learn model, and they all follow the same API:: -.. note:: + # Here you can replace pickle with joblib or cloudpickle + from pickle import dump + with open("filename.pkl", "wb") as f: + dump(clf, f, protocol=5) - ``dump`` and ``load`` functions also accept file-like object - instead of filenames. More information on data persistence with Joblib is - available `here - `_. + Using `protocol=5` is recommended to reduce memory usage and make it faster to + store and load any large NumPy array stored as a fitted attribute in the model. + You can alternatively pass `protocol=pickle.HIGHEST_PROTOCOL` which is + equivalent to `protocol=5` in Python 3.8 and later (at the time of writing). -When an estimator is unpickled with a scikit-learn version that is inconsistent -with the version the estimator was pickled with, a -:class:`~sklearn.exceptions.InconsistentVersionWarning` is raised. This warning -can be caught to obtain the original version the estimator was pickled with: + And later when needed, you can load the same object from the persisted file:: - from sklearn.exceptions import InconsistentVersionWarning - warnings.simplefilter("error", InconsistentVersionWarning) - - try: - est = pickle.loads("model_from_prevision_version.pickle") - except InconsistentVersionWarning as w: - print(w.original_sklearn_version) + # Here you can replace pickle with joblib or cloudpickle + from pickle import load + with open("filename.pkl", "rb") as f: + clf = load(f) .. _persistence_limitations: -Security & maintainability limitations -...................................... +Security & Maintainability Limitations +-------------------------------------- + +:mod:`pickle` (and :mod:`joblib` and :mod:`clouldpickle` by extension), has +many documented security vulnerabilities by design and should only be used if +the artifact, i.e. the pickle-file, is coming from a trusted and verified +source. You should never load a pickle file from an untrusted source, similarly +to how you should never execute code from an untrusted source. -pickle (and joblib by extension), has some issues regarding maintainability -and security. Because of this, +Also note that arbitrary computations can be represented using the `ONNX` +format, and it is therefore recommended to serve models using `ONNX` in a +sandboxed environment to safeguard against computational and memory exploits. -* Never unpickle untrusted data as it could lead to malicious code being - executed upon loading. -* While models saved using one version of scikit-learn might load in - other versions, this is entirely unsupported and inadvisable. It should - also be kept in mind that operations performed on such data could give - different and unexpected results. +Also note that there are no supported ways to load a model trained with a +different version of scikit-learn. While using :mod:`skops.io`, :mod:`joblib`, +:mod:`pickle`, or `cloudpickle`_, models saved using one version of +scikit-learn might load in other versions, however, this is entirely +unsupported and inadvisable. It should also be kept in mind that operations +performed on such data could give different and unexpected results, or even +crash your Python process. In order to rebuild a similar model with future versions of scikit-learn, additional metadata should be saved along the pickled model: * The training data, e.g. a reference to an immutable snapshot -* The python source code used to generate the model +* The Python source code used to generate the model * The versions of scikit-learn and its dependencies * The cross validation score obtained on the training data This should make it possible to check that the cross-validation score is in the same range as before. -Aside for a few exceptions, pickled models should be portable across -architectures assuming the same versions of dependencies and Python are used. -If you encounter an estimator that is not portable please open an issue on -GitHub. Pickled models are often deployed in production using containers, like -Docker, in order to freeze the environment and dependencies. - -If you want to know more about these issues and explore other possible -serialization methods, please refer to this -`talk by Alex Gaynor -`_. - - -A more secure format: `skops` -............................. - -`skops `__ provides a more secure -format via the :mod:`skops.io` module. It avoids using :mod:`pickle` and only -loads files which have types and references to functions which are trusted -either by default or by the user. The API is very similar to ``pickle``, and -you can persist your models as explain in the `docs -`__ using -:func:`skops.io.dump` and :func:`skops.io.dumps`:: - - import skops.io as sio - obj = sio.dumps(clf) - -And you can load them back using :func:`skops.io.load` and -:func:`skops.io.loads`. However, you need to specify the types which are -trusted by you. You can get existing unknown types in a dumped object / file -using :func:`skops.io.get_untrusted_types`, and after checking its contents, -pass it to the load function:: - - unknown_types = sio.get_untrusted_types(data=obj) - clf = sio.loads(obj, trusted=unknown_types) - -If you trust the source of the file / object, you can pass ``trusted=True``:: - - clf = sio.loads(obj, trusted=True) - -Please report issues and feature requests related to this format on the `skops -issue tracker `__. - -Interoperable formats ---------------------- - -For reproducibility and quality control needs, when different architectures -and environments should be taken into account, exporting the model in -`Open Neural Network -Exchange `_ format or `Predictive Model Markup Language -(PMML) `_ format -might be a better approach than using `pickle` alone. -These are helpful where you may want to use your model for prediction in a -different environment from where the model was trained. - -ONNX is a binary serialization of the model. It has been developed to improve -the usability of the interoperable representation of data models. -It aims to facilitate the conversion of the data -models between different machine learning frameworks, and to improve their -portability on different computing architectures. More details are available -from the `ONNX tutorial `_. -To convert scikit-learn model to ONNX a specific tool `sklearn-onnx -`_ has been developed. - -PMML is an implementation of the `XML -`_ document standard -defined to represent data models together with the data used to generate them. -Being human and machine readable, -PMML is a good option for model validation on different platforms and -long term archiving. On the other hand, as XML in general, its verbosity does -not help in production when performance is critical. -To convert scikit-learn model to PMML you can use for example `sklearn2pmml -`_ distributed under the Affero GPLv3 -license. +Aside for a few exceptions, persisted models should be portable across +operating systems and hardware architectures assuming the same versions of +dependencies and Python are used. If you encounter an estimator that is not +portable, please open an issue on GitHub. Persisted models are often deployed +in production using containers like Docker, in order to freeze the environment +and dependencies. + +If you want to know more about these issues, please refer to these talks: + +- `Adrin Jalali: Let's exploit pickle, and skops to the rescue! | PyData + Amsterdam 2023 `__. +- `Alex Gaynor: Pickles are for Delis, not Software - PyCon 2014 + `__. + + +.. _serving_environment: + +Replicating the training environment in production +.................................................. + +If the versions of the dependencies used may differ from training to +production, it may result in unexpected behaviour and errors while using the +trained model. To prevent such situations it is recommended to use the same +dependencies and versions in both the training and production environment. +These transitive dependencies can be pinned with the help of package management +tools like `pip`, `mamba`, `conda`, `poetry`, `conda-lock`, `pixi`, etc. + +It is not always possible to load an model trained with older versions of the +scikit-learn library and its dependencies in an updated software environment. +Instead, you might need to retrain the model with the new versions of the all +the libraries. So when training a model, it is important to record the training +recipe (e.g. a Python script) and training set information, and metadata about +all the dependencies to be able to automatically reconstruct the same training +environment for the updated software. + +.. dropdown:: InconsistentVersionWarning + + When an estimator is loaded with a scikit-learn version that is inconsistent + with the version the estimator was pickled with, a + :class:`~sklearn.exceptions.InconsistentVersionWarning` is raised. This warning + can be caught to obtain the original version the estimator was pickled with:: + + from sklearn.exceptions import InconsistentVersionWarning + warnings.simplefilter("error", InconsistentVersionWarning) + + try: + with open("model_from_prevision_version.pickle", "rb") as f: + est = pickle.load(f) + except InconsistentVersionWarning as w: + print(w.original_sklearn_version) + + +Serving the model artifact +.......................... + +The last step after training a scikit-learn model is serving the model. +Once the trained model is successfully loaded, it can be served to manage +different prediction requests. This can involve deploying the model as a +web service using containerization, or other model deployment strategies, +according to the specifications. + + +Summarizing the key points +-------------------------- + +Based on the different approaches for model persistence, the key points for +each approach can be summarized as follows: + +* `ONNX`: It provides a uniform format for persisting any machine learning or + deep learning model (other than scikit-learn) and is useful for model + inference (predictions). It can however, result in compatibility issues with + different frameworks. +* :mod:`skops.io`: Trained scikit-learn models can be easily shared and put + into production using :mod:`skops.io`. It is more secure compared to + alternate approaches based on :mod:`pickle` because it does not load + arbitrary code unless explicitly asked for by the user. Such code needs to be + packaged and importable in the target Python environment. +* :mod:`joblib`: Efficient memory mapping techniques make it faster when using + the same persisted model in multiple Python processes when using + `mmap_mode="r"`. It also gives easy shortcuts to compress and decompress the + persisted object without the need for extra code. However, it may trigger the + execution of malicious code when loading a model from an untrusted source as + any other pickle-based persistence mechanism. +* :mod:`pickle`: It is native to Python and most Python objects can be + serialized and deserialized using :mod:`pickle`, including custom Python + classes and functions as long as they are defined in a package that can be + imported in the target environment. While :mod:`pickle` can be used to easily + save and load scikit-learn models, it may trigger the execution of malicious + code while loading a model from an untrusted source. :mod:`pickle` can also + be very efficient memorywise if the model was persisted with `protocol=5` but + it does not support memory mapping. +* `cloudpickle`_: It has comparable loading efficiency as :mod:`pickle` and + :mod:`joblib` (without memory mapping), but offers additional flexibility to + serialize custom Python code such as lambda expressions and interactively + defined functions and classes. It might be a last resort to persist pipelines + with custom Python components such as a + :class:`sklearn.preprocessing.FunctionTransformer` that wraps a function + defined in the training script itself or more generally outside of any + importable Python package. Note that `cloudpickle`_ offers no forward + compatibility guarantees and you might need the same version of + `cloudpickle`_ to load the persisted model along with the same version of all + the libraries used to define the model. As the other pickle-based persistence + mechanisms, it may trigger the execution of malicious code while loading + a model from an untrusted source. + +.. _cloudpickle: https://github.com/cloudpipe/cloudpickle diff --git a/doc/model_selection.rst b/doc/model_selection.rst index 25cd2b655ccc5..b78c9ff4c3aa8 100644 --- a/doc/model_selection.rst +++ b/doc/model_selection.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _model_selection: Model selection and evaluation @@ -14,5 +8,6 @@ Model selection and evaluation modules/cross_validation modules/grid_search + modules/classification_threshold modules/model_evaluation modules/learning_curve diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 71a2e1ce0a6ce..c1f03b95a42c1 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _array_api: ================================ @@ -25,7 +21,7 @@ At this stage, this support is **considered experimental** and must be enabled explicitly as explained in the following. .. note:: - Currently, only `cupy.array_api`, `numpy.array_api`, `cupy`, and `PyTorch` + Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch` are known to work with scikit-learn's estimators. Example usage @@ -83,17 +79,57 @@ the tensors directly:: >>> X_trans.device.type 'cuda' -.. _array_api_estimators: +.. _array_api_supported: + +Support for `Array API`-compatible inputs +========================================= -Estimators with support for `Array API`-compatible inputs -========================================================= +Estimators and other tools in scikit-learn that support Array API compatible inputs. +Estimators +---------- + +- :class:`decomposition.PCA` (with `svd_solver="full"`, + `svd_solver="randomized"` and `power_iteration_normalizer="QR"`) +- :class:`linear_model.Ridge` (with `solver="svd"`) - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`) +- :class:`preprocessing.KernelCenterer` +- :class:`preprocessing.MaxAbsScaler` +- :class:`preprocessing.MinMaxScaler` +- :class:`preprocessing.Normalizer` + +Metrics +------- + +- :func:`sklearn.metrics.accuracy_score` +- :func:`sklearn.metrics.pairwise.cosine_similarity`` +- :func:`sklearn.metrics.r2_score` +- :func:`sklearn.metrics.zero_one_loss` -Coverage for more estimators is expected to grow over time. Please follow the -dedicated `meta-issue on GitHub +Tools +----- + +- :func:`model_selection.train_test_split` + +Coverage is expected to grow over time. Please follow the dedicated `meta-issue on GitHub `_ to track progress. +Type of return values and fitted attributes +------------------------------------------- + +When calling functions or methods with Array API compatible inputs, the +convention is to return array values of the same array container type and +device as the input data. + +Similarly, when an estimator is fitted with Array API compatible inputs, the +fitted attributes will be arrays from the same library as the input and stored +on the same device. The `predict` and `transform` method subsequently expect +inputs from the same array library and device as the data passed to the `fit` +method. + +Note however that scoring functions that return scalar values return Python +scalars (typically a `float` instance) instead of an array scalar value. + Common estimator checks ======================= @@ -107,4 +143,30 @@ To run these checks you need to install test environment. To run the full set of checks you need to install both `PyTorch `_ and `CuPy `_ and have a GPU. Checks that can not be executed or have missing dependencies will be -automatically skipped. \ No newline at end of file +automatically skipped. Therefore it's important to run the tests with the +`-v` flag to see which checks are skipped: + +.. prompt:: bash $ + + pip install array-api-compat # and other libraries as needed + pytest -k "array_api" -v + +Note on MPS device support +-------------------------- + +On macOS, PyTorch can use the Metal Performance Shaders (MPS) to access +hardware accelerators (e.g. the internal GPU component of the M1 or M2 chips). +However, the MPS device support for PyTorch is incomplete at the time of +writing. See the following github issue for more details: + +- https://github.com/pytorch/pytorch/issues/77764 + +To enable the MPS support in PyTorch, set the environment variable +`PYTORCH_ENABLE_MPS_FALLBACK=1` before running the tests: + +.. prompt:: bash $ + + PYTORCH_ENABLE_MPS_FALLBACK=1 pytest -k "array_api" -v + +At the time of writing all scikit-learn tests should pass, however, the +computational speed is not necessarily better than with the CPU device. diff --git a/doc/modules/biclustering.rst b/doc/modules/biclustering.rst index 44a996ed0ffd6..503a535c408f0 100644 --- a/doc/modules/biclustering.rst +++ b/doc/modules/biclustering.rst @@ -4,8 +4,7 @@ Biclustering ============ -Biclustering can be performed with the module -:mod:`sklearn.cluster.bicluster`. Biclustering algorithms simultaneously +Biclustering algorithms simultaneously cluster rows and columns of a data matrix. These clusters of rows and columns are known as biclusters. Each determines a submatrix of the original data matrix with some desired properties. @@ -82,7 +81,7 @@ diagonal and checkerboard bicluster structures. these alternate names. -.. currentmodule:: sklearn.cluster.bicluster +.. currentmodule:: sklearn.cluster .. _spectral_coclustering: @@ -148,21 +147,21 @@ Then the rows of :math:`Z` are clustered using :ref:`k-means and the remaining ``n_columns`` labels provide the column partitioning. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_coclustering.py`: A simple example - showing how to generate a data matrix with biclusters and apply - this method to it. +* :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_coclustering.py`: A simple example + showing how to generate a data matrix with biclusters and apply + this method to it. - * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py`: An example of finding - biclusters in the twenty newsgroup dataset. +* :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py`: An example of finding + biclusters in the twenty newsgroup dataset. -.. topic:: References: +.. rubric:: References - * Dhillon, Inderjit S, 2001. :doi:`Co-clustering documents and words using - bipartite spectral graph partitioning - <10.1145/502512.502550>` +* Dhillon, Inderjit S, 2001. :doi:`Co-clustering documents and words using + bipartite spectral graph partitioning + <10.1145/502512.502550>` .. _spectral_biclustering: @@ -235,17 +234,17 @@ Similarly, projecting the columns to :math:`A^{\top} * U` and clustering this :math:`n \times q` matrix yields the column labels. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_biclustering.py`: a simple example - showing how to generate a checkerboard matrix and bicluster it. +* :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_biclustering.py`: a simple example + showing how to generate a checkerboard matrix and bicluster it. -.. topic:: References: +.. rubric:: References - * Kluger, Yuval, et. al., 2003. :doi:`Spectral biclustering of microarray - data: coclustering genes and conditions - <10.1101/gr.648603>` +* Kluger, Yuval, et. al., 2003. :doi:`Spectral biclustering of microarray + data: coclustering genes and conditions + <10.1101/gr.648603>` .. _biclustering_evaluation: @@ -299,8 +298,8 @@ are totally dissimilar. The maximum score, 1, occurs when both sets are identical. -.. topic:: References: +.. rubric:: References - * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis - for bicluster acquisition - `__. +* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis + for bicluster acquisition + `__. diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst index 081b3e9a0a883..a2bfa152d2b26 100644 --- a/doc/modules/calibration.rst +++ b/doc/modules/calibration.rst @@ -74,10 +74,14 @@ by showing the number of samples in each predicted probability bin. .. currentmodule:: sklearn.linear_model -:class:`LogisticRegression` returns well calibrated predictions by default as it has a +:class:`LogisticRegression` is more likely to return well calibrated predictions by itself as it has a canonical link function for its loss, i.e. the logit-link for the :ref:`log_loss`. -This leads to the so-called **balance property**, see [8]_ and -:ref:`Logistic_regression`. +In the unpenalized case, this leads to the so-called **balance property**, see [8]_ and :ref:`Logistic_regression`. +In the plot above, data is generated according to a linear mechanism, which is +consistent with the :class:`LogisticRegression` model (the model is 'well specified'), +and the value of the regularization parameter `C` is tuned to be +appropriate (neither too strong nor too low). As a consequence, this model returns +accurate predictions from its `predict_proba` method. In contrast to that, the other shown models return biased probabilities; with different biases per model. @@ -241,7 +245,7 @@ there is enough data (greater than ~ 1000 samples) to avoid overfitting [3]_. `method="isotonic"` since isotonic regression introduces ties in the predicted probabilities. This can be seen as within the uncertainty of the model predictions. In case, you strictly want to keep the ranking and thus AUC scores, use - `method="logistic"` which is a strictly monotonic transformation and thus keeps + `method="sigmoid"` which is a strictly monotonic transformation and thus keeps the ranking. Multiclass support @@ -258,51 +262,51 @@ probabilities, the calibrated probabilities for each class are predicted separately. As those probabilities do not necessarily sum to one, a postprocessing is performed to normalize them. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration_multiclass.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_compare_calibration.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration_multiclass.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_compare_calibration.py` -.. topic:: References: +.. rubric:: References - .. [1] Allan H. Murphy (1973). - :doi:`"A New Vector Partition of the Probability Score" - <10.1175/1520-0450(1973)012%3C0595:ANVPOT%3E2.0.CO;2>` - Journal of Applied Meteorology and Climatology +.. [1] Allan H. Murphy (1973). + :doi:`"A New Vector Partition of the Probability Score" + <10.1175/1520-0450(1973)012%3C0595:ANVPOT%3E2.0.CO;2>` + Journal of Applied Meteorology and Climatology - .. [2] `On the combination of forecast probabilities for - consecutive precipitation periods. - `_ - Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a +.. [2] `On the combination of forecast probabilities for + consecutive precipitation periods. + `_ + Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a - .. [3] `Predicting Good Probabilities with Supervised Learning - `_, - A. Niculescu-Mizil & R. Caruana, ICML 2005 +.. [3] `Predicting Good Probabilities with Supervised Learning + `_, + A. Niculescu-Mizil & R. Caruana, ICML 2005 - .. [4] `Probabilistic Outputs for Support Vector Machines and Comparisons - to Regularized Likelihood Methods. - `_ - J. Platt, (1999) +.. [4] `Probabilistic Outputs for Support Vector Machines and Comparisons + to Regularized Likelihood Methods. + `_ + J. Platt, (1999) - .. [5] `Transforming Classifier Scores into Accurate Multiclass - Probability Estimates. - `_ - B. Zadrozny & C. Elkan, (KDD 2002) +.. [5] `Transforming Classifier Scores into Accurate Multiclass + Probability Estimates. + `_ + B. Zadrozny & C. Elkan, (KDD 2002) - .. [6] `Predicting accurate probabilities with a ranking loss. - `_ - Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L. - Proc Int Conf Mach Learn. 2012;2012:703-710 +.. [6] `Predicting accurate probabilities with a ranking loss. + `_ + Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L. + Proc Int Conf Mach Learn. 2012;2012:703-710 - .. [7] `Beyond sigmoids: How to obtain well-calibrated probabilities from - binary classifiers with beta calibration - `_ - Kull, M., Silva Filho, T. M., & Flach, P. (2017). +.. [7] `Beyond sigmoids: How to obtain well-calibrated probabilities from + binary classifiers with beta calibration + `_ + Kull, M., Silva Filho, T. M., & Flach, P. (2017). - .. [8] Mario V. WÃŧthrich, Michael Merz (2023). - :doi:`"Statistical Foundations of Actuarial Learning and its Applications" - <10.1007/978-3-031-12409-9>` - Springer Actuarial +.. [8] Mario V. WÃŧthrich, Michael Merz (2023). + :doi:`"Statistical Foundations of Actuarial Learning and its Applications" + <10.1007/978-3-031-12409-9>` + Springer Actuarial diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst deleted file mode 100644 index 204c300b1a9b8..0000000000000 --- a/doc/modules/classes.rst +++ /dev/null @@ -1,1691 +0,0 @@ -.. _api_ref: - -============= -API Reference -============= - -This is the class and function reference of scikit-learn. Please refer to -the :ref:`full user guide ` for further details, as the class and -function raw specifications may not be enough to give full guidelines on their -uses. -For reference on concepts repeated across the API, see :ref:`glossary`. - - -:mod:`sklearn.base`: Base classes and utility functions -======================================================= - -.. automodule:: sklearn.base - :no-members: - :no-inherited-members: - -Base classes ------------- -.. currentmodule:: sklearn - -.. autosummary:: - :nosignatures: - :toctree: generated/ - :template: class.rst - - base.BaseEstimator - base.BiclusterMixin - base.ClassifierMixin - base.ClusterMixin - base.DensityMixin - base.RegressorMixin - base.TransformerMixin - base.MetaEstimatorMixin - base.OneToOneFeatureMixin - base.ClassNamePrefixFeaturesOutMixin - feature_selection.SelectorMixin - -Functions ---------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - base.clone - base.is_classifier - base.is_regressor - config_context - get_config - set_config - show_versions - -.. _calibration_ref: - -:mod:`sklearn.calibration`: Probability Calibration -=================================================== - -.. automodule:: sklearn.calibration - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`calibration` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - calibration.CalibratedClassifierCV - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - calibration.calibration_curve - -.. _cluster_ref: - -:mod:`sklearn.cluster`: Clustering -================================== - -.. automodule:: sklearn.cluster - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`clustering` and :ref:`biclustering` sections for -further details. - -Classes -------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cluster.AffinityPropagation - cluster.AgglomerativeClustering - cluster.Birch - cluster.DBSCAN - cluster.HDBSCAN - cluster.FeatureAgglomeration - cluster.KMeans - cluster.BisectingKMeans - cluster.MiniBatchKMeans - cluster.MeanShift - cluster.OPTICS - cluster.SpectralClustering - cluster.SpectralBiclustering - cluster.SpectralCoclustering - -Functions ---------- -.. autosummary:: - :toctree: generated/ - :template: function.rst - - cluster.affinity_propagation - cluster.cluster_optics_dbscan - cluster.cluster_optics_xi - cluster.compute_optics_graph - cluster.dbscan - cluster.estimate_bandwidth - cluster.k_means - cluster.kmeans_plusplus - cluster.mean_shift - cluster.spectral_clustering - cluster.ward_tree - -.. _compose_ref: - -:mod:`sklearn.compose`: Composite Estimators -============================================ - -.. automodule:: sklearn.compose - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - compose.ColumnTransformer - compose.TransformedTargetRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - compose.make_column_transformer - compose.make_column_selector - -.. _covariance_ref: - -:mod:`sklearn.covariance`: Covariance Estimators -================================================ - -.. automodule:: sklearn.covariance - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`covariance` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - covariance.EmpiricalCovariance - covariance.EllipticEnvelope - covariance.GraphicalLasso - covariance.GraphicalLassoCV - covariance.LedoitWolf - covariance.MinCovDet - covariance.OAS - covariance.ShrunkCovariance - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - covariance.empirical_covariance - covariance.graphical_lasso - covariance.ledoit_wolf - covariance.ledoit_wolf_shrinkage - covariance.oas - covariance.shrunk_covariance - -.. _cross_decomposition_ref: - -:mod:`sklearn.cross_decomposition`: Cross decomposition -======================================================= - -.. automodule:: sklearn.cross_decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_decomposition` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cross_decomposition.CCA - cross_decomposition.PLSCanonical - cross_decomposition.PLSRegression - cross_decomposition.PLSSVD - -.. _datasets_ref: - -:mod:`sklearn.datasets`: Datasets -================================= - -.. automodule:: sklearn.datasets - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`datasets` section for further details. - -Loaders -------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.clear_data_home - datasets.dump_svmlight_file - datasets.fetch_20newsgroups - datasets.fetch_20newsgroups_vectorized - datasets.fetch_california_housing - datasets.fetch_covtype - datasets.fetch_kddcup99 - datasets.fetch_lfw_pairs - datasets.fetch_lfw_people - datasets.fetch_olivetti_faces - datasets.fetch_openml - datasets.fetch_rcv1 - datasets.fetch_species_distributions - datasets.get_data_home - datasets.load_breast_cancer - datasets.load_diabetes - datasets.load_digits - datasets.load_files - datasets.load_iris - datasets.load_linnerud - datasets.load_sample_image - datasets.load_sample_images - datasets.load_svmlight_file - datasets.load_svmlight_files - datasets.load_wine - -Samples generator ------------------ - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.make_biclusters - datasets.make_blobs - datasets.make_checkerboard - datasets.make_circles - datasets.make_classification - datasets.make_friedman1 - datasets.make_friedman2 - datasets.make_friedman3 - datasets.make_gaussian_quantiles - datasets.make_hastie_10_2 - datasets.make_low_rank_matrix - datasets.make_moons - datasets.make_multilabel_classification - datasets.make_regression - datasets.make_s_curve - datasets.make_sparse_coded_signal - datasets.make_sparse_spd_matrix - datasets.make_sparse_uncorrelated - datasets.make_spd_matrix - datasets.make_swiss_roll - - -.. _decomposition_ref: - -:mod:`sklearn.decomposition`: Matrix Decomposition -================================================== - -.. automodule:: sklearn.decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`decompositions` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - decomposition.DictionaryLearning - decomposition.FactorAnalysis - decomposition.FastICA - decomposition.IncrementalPCA - decomposition.KernelPCA - decomposition.LatentDirichletAllocation - decomposition.MiniBatchDictionaryLearning - decomposition.MiniBatchSparsePCA - decomposition.NMF - decomposition.MiniBatchNMF - decomposition.PCA - decomposition.SparsePCA - decomposition.SparseCoder - decomposition.TruncatedSVD - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - decomposition.dict_learning - decomposition.dict_learning_online - decomposition.fastica - decomposition.non_negative_factorization - decomposition.sparse_encode - -.. _lda_ref: - -:mod:`sklearn.discriminant_analysis`: Discriminant Analysis -=========================================================== - -.. automodule:: sklearn.discriminant_analysis - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`lda_qda` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - discriminant_analysis.LinearDiscriminantAnalysis - discriminant_analysis.QuadraticDiscriminantAnalysis - -.. _dummy_ref: - -:mod:`sklearn.dummy`: Dummy estimators -====================================== - -.. automodule:: sklearn.dummy - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`model_evaluation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - dummy.DummyClassifier - dummy.DummyRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - -.. _ensemble_ref: - -:mod:`sklearn.ensemble`: Ensemble Methods -========================================= - -.. automodule:: sklearn.ensemble - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`ensemble` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - ensemble.AdaBoostClassifier - ensemble.AdaBoostRegressor - ensemble.BaggingClassifier - ensemble.BaggingRegressor - ensemble.ExtraTreesClassifier - ensemble.ExtraTreesRegressor - ensemble.GradientBoostingClassifier - ensemble.GradientBoostingRegressor - ensemble.IsolationForest - ensemble.RandomForestClassifier - ensemble.RandomForestRegressor - ensemble.RandomTreesEmbedding - ensemble.StackingClassifier - ensemble.StackingRegressor - ensemble.VotingClassifier - ensemble.VotingRegressor - ensemble.HistGradientBoostingRegressor - ensemble.HistGradientBoostingClassifier - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - -.. _exceptions_ref: - -:mod:`sklearn.exceptions`: Exceptions and warnings -================================================== - -.. automodule:: sklearn.exceptions - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - exceptions.ConvergenceWarning - exceptions.DataConversionWarning - exceptions.DataDimensionalityWarning - exceptions.EfficiencyWarning - exceptions.FitFailedWarning - exceptions.InconsistentVersionWarning - exceptions.NotFittedError - exceptions.UndefinedMetricWarning - - -:mod:`sklearn.experimental`: Experimental -========================================= - -.. automodule:: sklearn.experimental - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - - experimental.enable_iterative_imputer - experimental.enable_halving_search_cv - - -.. _feature_extraction_ref: - -:mod:`sklearn.feature_extraction`: Feature Extraction -===================================================== - -.. automodule:: sklearn.feature_extraction - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_extraction` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.DictVectorizer - feature_extraction.FeatureHasher - -From images ------------ - -.. automodule:: sklearn.feature_extraction.image - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_extraction.image.extract_patches_2d - feature_extraction.image.grid_to_graph - feature_extraction.image.img_to_graph - feature_extraction.image.reconstruct_from_patches_2d - - :template: class.rst - - feature_extraction.image.PatchExtractor - -.. _text_feature_extraction_ref: - -From text ---------- - -.. automodule:: sklearn.feature_extraction.text - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.text.CountVectorizer - feature_extraction.text.HashingVectorizer - feature_extraction.text.TfidfTransformer - feature_extraction.text.TfidfVectorizer - - -.. _feature_selection_ref: - -:mod:`sklearn.feature_selection`: Feature Selection -=================================================== - -.. automodule:: sklearn.feature_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_selection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_selection.GenericUnivariateSelect - feature_selection.SelectPercentile - feature_selection.SelectKBest - feature_selection.SelectFpr - feature_selection.SelectFdr - feature_selection.SelectFromModel - feature_selection.SelectFwe - feature_selection.SequentialFeatureSelector - feature_selection.RFE - feature_selection.RFECV - feature_selection.VarianceThreshold - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_selection.chi2 - feature_selection.f_classif - feature_selection.f_regression - feature_selection.r_regression - feature_selection.mutual_info_classif - feature_selection.mutual_info_regression - - -.. _gaussian_process_ref: - -:mod:`sklearn.gaussian_process`: Gaussian Processes -=================================================== - -.. automodule:: sklearn.gaussian_process - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`gaussian_process` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - gaussian_process.GaussianProcessClassifier - gaussian_process.GaussianProcessRegressor - -Kernels: - -.. autosummary:: - :toctree: generated/ - :template: class_with_call.rst - - gaussian_process.kernels.CompoundKernel - gaussian_process.kernels.ConstantKernel - gaussian_process.kernels.DotProduct - gaussian_process.kernels.ExpSineSquared - gaussian_process.kernels.Exponentiation - gaussian_process.kernels.Hyperparameter - gaussian_process.kernels.Kernel - gaussian_process.kernels.Matern - gaussian_process.kernels.PairwiseKernel - gaussian_process.kernels.Product - gaussian_process.kernels.RBF - gaussian_process.kernels.RationalQuadratic - gaussian_process.kernels.Sum - gaussian_process.kernels.WhiteKernel - - -.. _impute_ref: - -:mod:`sklearn.impute`: Impute -============================= - -.. automodule:: sklearn.impute - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`Impute` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - impute.SimpleImputer - impute.IterativeImputer - impute.MissingIndicator - impute.KNNImputer - - -.. _inspection_ref: - -:mod:`sklearn.inspection`: Inspection -===================================== - -.. automodule:: sklearn.inspection - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - inspection.partial_dependence - inspection.permutation_importance - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - inspection.DecisionBoundaryDisplay - inspection.PartialDependenceDisplay - -.. _isotonic_ref: - -:mod:`sklearn.isotonic`: Isotonic regression -============================================ - -.. automodule:: sklearn.isotonic - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`isotonic` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - isotonic.IsotonicRegression - -.. autosummary:: - :toctree: generated - :template: function.rst - - isotonic.check_increasing - isotonic.isotonic_regression - - -.. _kernel_approximation_ref: - -:mod:`sklearn.kernel_approximation`: Kernel Approximation -========================================================= - -.. automodule:: sklearn.kernel_approximation - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_approximation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_approximation.AdditiveChi2Sampler - kernel_approximation.Nystroem - kernel_approximation.PolynomialCountSketch - kernel_approximation.RBFSampler - kernel_approximation.SkewedChi2Sampler - -.. _kernel_ridge_ref: - -:mod:`sklearn.kernel_ridge`: Kernel Ridge Regression -==================================================== - -.. automodule:: sklearn.kernel_ridge - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_ridge` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_ridge.KernelRidge - -.. _linear_model_ref: - -:mod:`sklearn.linear_model`: Linear Models -========================================== - -.. automodule:: sklearn.linear_model - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`linear_model` section for further details. - -The following subsections are only rough guidelines: the same estimator can -fall into multiple categories, depending on its parameters. - -.. currentmodule:: sklearn - -Linear classifiers ------------------- -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LogisticRegression - linear_model.LogisticRegressionCV - linear_model.PassiveAggressiveClassifier - linear_model.Perceptron - linear_model.RidgeClassifier - linear_model.RidgeClassifierCV - linear_model.SGDClassifier - linear_model.SGDOneClassSVM - -Classical linear regressors ---------------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LinearRegression - linear_model.Ridge - linear_model.RidgeCV - linear_model.SGDRegressor - -Regressors with variable selection ----------------------------------- - -The following estimators have built-in variable selection fitting -procedures, but any estimator using a L1 or elastic-net penalty also -performs variable selection: typically :class:`~linear_model.SGDRegressor` -or :class:`~sklearn.linear_model.SGDClassifier` with an appropriate penalty. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ElasticNet - linear_model.ElasticNetCV - linear_model.Lars - linear_model.LarsCV - linear_model.Lasso - linear_model.LassoCV - linear_model.LassoLars - linear_model.LassoLarsCV - linear_model.LassoLarsIC - linear_model.OrthogonalMatchingPursuit - linear_model.OrthogonalMatchingPursuitCV - -Bayesian regressors -------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ARDRegression - linear_model.BayesianRidge - -Multi-task linear regressors with variable selection ----------------------------------------------------- - -These estimators fit multiple regression problems (or tasks) jointly, while -inducing sparse coefficients. While the inferred coefficients may differ -between the tasks, they are constrained to agree on the features that are -selected (non-zero coefficients). - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.MultiTaskElasticNet - linear_model.MultiTaskElasticNetCV - linear_model.MultiTaskLasso - linear_model.MultiTaskLassoCV - -Outlier-robust regressors -------------------------- - -Any estimator using the Huber loss would also be robust to outliers, e.g. -:class:`~linear_model.SGDRegressor` with ``loss='huber'``. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.HuberRegressor - linear_model.QuantileRegressor - linear_model.RANSACRegressor - linear_model.TheilSenRegressor - -Generalized linear models (GLM) for regression ----------------------------------------------- - -These models allow for response variables to have error distributions other -than a normal distribution: - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.PoissonRegressor - linear_model.TweedieRegressor - linear_model.GammaRegressor - - -Miscellaneous -------------- - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - linear_model.PassiveAggressiveRegressor - linear_model.enet_path - linear_model.lars_path - linear_model.lars_path_gram - linear_model.lasso_path - linear_model.orthogonal_mp - linear_model.orthogonal_mp_gram - linear_model.ridge_regression - - -.. _manifold_ref: - -:mod:`sklearn.manifold`: Manifold Learning -========================================== - -.. automodule:: sklearn.manifold - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`manifold` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - manifold.Isomap - manifold.LocallyLinearEmbedding - manifold.MDS - manifold.SpectralEmbedding - manifold.TSNE - -.. autosummary:: - :toctree: generated - :template: function.rst - - manifold.locally_linear_embedding - manifold.smacof - manifold.spectral_embedding - manifold.trustworthiness - - -.. _metrics_ref: - -:mod:`sklearn.metrics`: Metrics -=============================== - -See the :ref:`model_evaluation` section and the :ref:`metrics` section of the -user guide for further details. - -.. automodule:: sklearn.metrics - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -Model Selection Interface -------------------------- -See the :ref:`scoring_parameter` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.check_scoring - metrics.get_scorer - metrics.get_scorer_names - metrics.make_scorer - -Classification metrics ----------------------- - -See the :ref:`classification_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.accuracy_score - metrics.auc - metrics.average_precision_score - metrics.balanced_accuracy_score - metrics.brier_score_loss - metrics.class_likelihood_ratios - metrics.classification_report - metrics.cohen_kappa_score - metrics.confusion_matrix - metrics.dcg_score - metrics.det_curve - metrics.f1_score - metrics.fbeta_score - metrics.hamming_loss - metrics.hinge_loss - metrics.jaccard_score - metrics.log_loss - metrics.matthews_corrcoef - metrics.multilabel_confusion_matrix - metrics.ndcg_score - metrics.precision_recall_curve - metrics.precision_recall_fscore_support - metrics.precision_score - metrics.recall_score - metrics.roc_auc_score - metrics.roc_curve - metrics.top_k_accuracy_score - metrics.zero_one_loss - -Regression metrics ------------------- - -See the :ref:`regression_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.explained_variance_score - metrics.max_error - metrics.mean_absolute_error - metrics.mean_squared_error - metrics.mean_squared_log_error - metrics.median_absolute_error - metrics.mean_absolute_percentage_error - metrics.r2_score - metrics.mean_poisson_deviance - metrics.mean_gamma_deviance - metrics.mean_tweedie_deviance - metrics.d2_tweedie_score - metrics.mean_pinball_loss - metrics.d2_pinball_score - metrics.d2_absolute_error_score - -Multilabel ranking metrics --------------------------- -See the :ref:`multilabel_ranking_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.coverage_error - metrics.label_ranking_average_precision_score - metrics.label_ranking_loss - - -Clustering metrics ------------------- - -See the :ref:`clustering_evaluation` section of the user guide for further -details. - -.. automodule:: sklearn.metrics.cluster - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.adjusted_mutual_info_score - metrics.adjusted_rand_score - metrics.calinski_harabasz_score - metrics.davies_bouldin_score - metrics.completeness_score - metrics.cluster.contingency_matrix - metrics.cluster.pair_confusion_matrix - metrics.fowlkes_mallows_score - metrics.homogeneity_completeness_v_measure - metrics.homogeneity_score - metrics.mutual_info_score - metrics.normalized_mutual_info_score - metrics.rand_score - metrics.silhouette_score - metrics.silhouette_samples - metrics.v_measure_score - -Biclustering metrics --------------------- - -See the :ref:`biclustering_evaluation` section of the user guide for -further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.consensus_score - -Distance metrics ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - metrics.DistanceMetric - -Pairwise metrics ----------------- - -See the :ref:`metrics` section of the user guide for further details. - -.. automodule:: sklearn.metrics.pairwise - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.pairwise.additive_chi2_kernel - metrics.pairwise.chi2_kernel - metrics.pairwise.cosine_similarity - metrics.pairwise.cosine_distances - metrics.pairwise.distance_metrics - metrics.pairwise.euclidean_distances - metrics.pairwise.haversine_distances - metrics.pairwise.kernel_metrics - metrics.pairwise.laplacian_kernel - metrics.pairwise.linear_kernel - metrics.pairwise.manhattan_distances - metrics.pairwise.nan_euclidean_distances - metrics.pairwise.pairwise_kernels - metrics.pairwise.polynomial_kernel - metrics.pairwise.rbf_kernel - metrics.pairwise.sigmoid_kernel - metrics.pairwise.paired_euclidean_distances - metrics.pairwise.paired_manhattan_distances - metrics.pairwise.paired_cosine_distances - metrics.pairwise.paired_distances - metrics.pairwise_distances - metrics.pairwise_distances_argmin - metrics.pairwise_distances_argmin_min - metrics.pairwise_distances_chunked - - -Plotting --------- - -See the :ref:`visualizations` section of the user guide for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_all_class_methods.rst - - metrics.ConfusionMatrixDisplay - metrics.DetCurveDisplay - metrics.PrecisionRecallDisplay - metrics.PredictionErrorDisplay - metrics.RocCurveDisplay - calibration.CalibrationDisplay - -.. _mixture_ref: - -:mod:`sklearn.mixture`: Gaussian Mixture Models -=============================================== - -.. automodule:: sklearn.mixture - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`mixture` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - mixture.BayesianGaussianMixture - mixture.GaussianMixture - -.. _modelselection_ref: - -:mod:`sklearn.model_selection`: Model Selection -=============================================== - -.. automodule:: sklearn.model_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and -:ref:`learning_curve` sections for further details. - -Splitter Classes ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GroupKFold - model_selection.GroupShuffleSplit - model_selection.KFold - model_selection.LeaveOneGroupOut - model_selection.LeavePGroupsOut - model_selection.LeaveOneOut - model_selection.LeavePOut - model_selection.PredefinedSplit - model_selection.RepeatedKFold - model_selection.RepeatedStratifiedKFold - model_selection.ShuffleSplit - model_selection.StratifiedKFold - model_selection.StratifiedShuffleSplit - model_selection.StratifiedGroupKFold - model_selection.TimeSeriesSplit - -Splitter Functions ------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.check_cv - model_selection.train_test_split - -.. _hyper_parameter_optimizers: - -Hyper-parameter optimizers --------------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GridSearchCV - model_selection.HalvingGridSearchCV - model_selection.ParameterGrid - model_selection.ParameterSampler - model_selection.RandomizedSearchCV - model_selection.HalvingRandomSearchCV - - -Model validation ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.cross_validate - model_selection.cross_val_predict - model_selection.cross_val_score - model_selection.learning_curve - model_selection.permutation_test_score - model_selection.validation_curve - -Visualization -------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - model_selection.LearningCurveDisplay - model_selection.ValidationCurveDisplay - -.. _multiclass_ref: - -:mod:`sklearn.multiclass`: Multiclass classification -==================================================== - -.. automodule:: sklearn.multiclass - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multiclass_classification` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - multiclass.OneVsRestClassifier - multiclass.OneVsOneClassifier - multiclass.OutputCodeClassifier - -.. _multioutput_ref: - -:mod:`sklearn.multioutput`: Multioutput regression and classification -===================================================================== - -.. automodule:: sklearn.multioutput - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multilabel_classification`, -:ref:`multiclass_multioutput_classification`, and -:ref:`multioutput_regression` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - multioutput.ClassifierChain - multioutput.MultiOutputRegressor - multioutput.MultiOutputClassifier - multioutput.RegressorChain - -.. _naive_bayes_ref: - -:mod:`sklearn.naive_bayes`: Naive Bayes -======================================= - -.. automodule:: sklearn.naive_bayes - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`naive_bayes` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - naive_bayes.BernoulliNB - naive_bayes.CategoricalNB - naive_bayes.ComplementNB - naive_bayes.GaussianNB - naive_bayes.MultinomialNB - - -.. _neighbors_ref: - -:mod:`sklearn.neighbors`: Nearest Neighbors -=========================================== - -.. automodule:: sklearn.neighbors - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neighbors` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neighbors.BallTree - neighbors.KDTree - neighbors.KernelDensity - neighbors.KNeighborsClassifier - neighbors.KNeighborsRegressor - neighbors.KNeighborsTransformer - neighbors.LocalOutlierFactor - neighbors.RadiusNeighborsClassifier - neighbors.RadiusNeighborsRegressor - neighbors.RadiusNeighborsTransformer - neighbors.NearestCentroid - neighbors.NearestNeighbors - neighbors.NeighborhoodComponentsAnalysis - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - neighbors.kneighbors_graph - neighbors.radius_neighbors_graph - neighbors.sort_graph_by_row_values - -.. _neural_network_ref: - -:mod:`sklearn.neural_network`: Neural network models -==================================================== - -.. automodule:: sklearn.neural_network - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neural_networks_supervised` and :ref:`neural_networks_unsupervised` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neural_network.BernoulliRBM - neural_network.MLPClassifier - neural_network.MLPRegressor - -.. _pipeline_ref: - -:mod:`sklearn.pipeline`: Pipeline -================================= - -.. automodule:: sklearn.pipeline - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - pipeline.FeatureUnion - pipeline.Pipeline - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - pipeline.make_pipeline - pipeline.make_union - -.. _preprocessing_ref: - -:mod:`sklearn.preprocessing`: Preprocessing and Normalization -============================================================= - -.. automodule:: sklearn.preprocessing - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`preprocessing` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - preprocessing.Binarizer - preprocessing.FunctionTransformer - preprocessing.KBinsDiscretizer - preprocessing.KernelCenterer - preprocessing.LabelBinarizer - preprocessing.LabelEncoder - preprocessing.MultiLabelBinarizer - preprocessing.MaxAbsScaler - preprocessing.MinMaxScaler - preprocessing.Normalizer - preprocessing.OneHotEncoder - preprocessing.OrdinalEncoder - preprocessing.PolynomialFeatures - preprocessing.PowerTransformer - preprocessing.QuantileTransformer - preprocessing.RobustScaler - preprocessing.SplineTransformer - preprocessing.StandardScaler - preprocessing.TargetEncoder - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - preprocessing.add_dummy_feature - preprocessing.binarize - preprocessing.label_binarize - preprocessing.maxabs_scale - preprocessing.minmax_scale - preprocessing.normalize - preprocessing.quantile_transform - preprocessing.robust_scale - preprocessing.scale - preprocessing.power_transform - - -.. _random_projection_ref: - -:mod:`sklearn.random_projection`: Random projection -=================================================== - -.. automodule:: sklearn.random_projection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`random_projection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - random_projection.GaussianRandomProjection - random_projection.SparseRandomProjection - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - random_projection.johnson_lindenstrauss_min_dim - - -.. _semi_supervised_ref: - -:mod:`sklearn.semi_supervised`: Semi-Supervised Learning -======================================================== - -.. automodule:: sklearn.semi_supervised - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`semi_supervised` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - semi_supervised.LabelPropagation - semi_supervised.LabelSpreading - semi_supervised.SelfTrainingClassifier - - -.. _svm_ref: - -:mod:`sklearn.svm`: Support Vector Machines -=========================================== - -.. automodule:: sklearn.svm - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`svm` section for further details. - -Estimators ----------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - svm.LinearSVC - svm.LinearSVR - svm.NuSVC - svm.NuSVR - svm.OneClassSVM - svm.SVC - svm.SVR - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - svm.l1_min_c - -.. _tree_ref: - -:mod:`sklearn.tree`: Decision Trees -=================================== - -.. automodule:: sklearn.tree - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`tree` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - tree.DecisionTreeClassifier - tree.DecisionTreeRegressor - tree.ExtraTreeClassifier - tree.ExtraTreeRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.export_graphviz - tree.export_text - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.plot_tree - -.. _utils_ref: - -:mod:`sklearn.utils`: Utilities -=============================== - -.. automodule:: sklearn.utils - :no-members: - :no-inherited-members: - -**Developer guide:** See the :ref:`developers-utils` page for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.Bunch - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.arrayfuncs.min_pos - utils.as_float_array - utils.assert_all_finite - utils.check_X_y - utils.check_array - utils.check_scalar - utils.check_consistent_length - utils.check_random_state - utils.class_weight.compute_class_weight - utils.class_weight.compute_sample_weight - utils.deprecated - utils.estimator_checks.check_estimator - utils.estimator_checks.parametrize_with_checks - utils.estimator_html_repr - utils.extmath.safe_sparse_dot - utils.extmath.randomized_range_finder - utils.extmath.randomized_svd - utils.extmath.fast_logdet - utils.extmath.density - utils.extmath.weighted_mode - utils.gen_batches - utils.gen_even_slices - utils.graph.single_source_shortest_path_length - utils.indexable - utils.metaestimators.available_if - utils.multiclass.type_of_target - utils.multiclass.is_multilabel - utils.multiclass.unique_labels - utils.murmurhash3_32 - utils.resample - utils._safe_indexing - utils.safe_mask - utils.safe_sqr - utils.shuffle - utils.sparsefuncs.incr_mean_variance_axis - utils.sparsefuncs.inplace_column_scale - utils.sparsefuncs.inplace_row_scale - utils.sparsefuncs.inplace_swap_row - utils.sparsefuncs.inplace_swap_column - utils.sparsefuncs.mean_variance_axis - utils.sparsefuncs.inplace_csr_column_scale - utils.sparsefuncs_fast.inplace_csr_row_normalize_l1 - utils.sparsefuncs_fast.inplace_csr_row_normalize_l2 - utils.random.sample_without_replacement - utils.validation.check_is_fitted - utils.validation.check_memory - utils.validation.check_symmetric - utils.validation.column_or_1d - utils.validation.has_fit_parameter - utils.metadata_routing.get_routing_for_object - utils.metadata_routing.MetadataRouter - utils.metadata_routing.MetadataRequest - utils.metadata_routing.MethodMapping - utils.metadata_routing.process_routing - -Specific utilities to list scikit-learn components: - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.discovery.all_estimators - utils.discovery.all_displays - utils.discovery.all_functions - -Utilities from joblib: - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.parallel.delayed - utils.parallel_backend - utils.register_parallel_backend - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.parallel.Parallel - - -Recently deprecated -=================== diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst new file mode 100644 index 0000000000000..712a094a43246 --- /dev/null +++ b/doc/modules/classification_threshold.rst @@ -0,0 +1,156 @@ +.. currentmodule:: sklearn.model_selection + +.. _TunedThresholdClassifierCV: + +================================================== +Tuning the decision threshold for class prediction +================================================== + +Classification is best divided into two parts: + +* the statistical problem of learning a model to predict, ideally, class probabilities; +* the decision problem to take concrete action based on those probability predictions. + +Let's take a straightforward example related to weather forecasting: the first point is +related to answering "what is the chance that it will rain tomorrow?" while the second +point is related to answering "should I take an umbrella tomorrow?". + +When it comes to the scikit-learn API, the first point is addressed providing scores +using :term:`predict_proba` or :term:`decision_function`. The former returns conditional +probability estimates :math:`P(y|X)` for each class, while the latter returns a decision +score for each class. + +The decision corresponding to the labels are obtained with :term:`predict`. In binary +classification, a decision rule or action is then defined by thresholding the scores, +leading to the prediction of a single class label for each sample. For binary +classification in scikit-learn, class labels predictions are obtained by hard-coded +cut-off rules: a positive class is predicted when the conditional probability +:math:`P(y|X)` is greater than 0.5 (obtained with :term:`predict_proba`) or if the +decision score is greater than 0 (obtained with :term:`decision_function`). + +Here, we show an example that illustrates the relation between conditional +probability estimates :math:`P(y|X)` and class labels:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.tree import DecisionTreeClassifier + >>> X, y = make_classification(random_state=0) + >>> classifier = DecisionTreeClassifier(max_depth=2, random_state=0).fit(X, y) + >>> classifier.predict_proba(X[:4]) + array([[0.94 , 0.06 ], + [0.94 , 0.06 ], + [0.0416..., 0.9583...], + [0.0416..., 0.9583...]]) + >>> classifier.predict(X[:4]) + array([0, 0, 1, 1]) + +While these hard-coded rules might at first seem reasonable as default behavior, they +are most certainly not ideal for most use cases. Let's illustrate with an example. + +Consider a scenario where a predictive model is being deployed to assist +physicians in detecting tumors. In this setting, physicians will most likely be +interested in identifying all patients with cancer and not missing anyone with cancer so +that they can provide them with the right treatment. In other words, physicians +prioritize achieving a high recall rate. This emphasis on recall comes, of course, with +the trade-off of potentially more false-positive predictions, reducing the precision of +the model. That is a risk physicians are willing to take because the cost of a missed +cancer is much higher than the cost of further diagnostic tests. Consequently, when it +comes to deciding whether to classify a patient as having cancer or not, it may be more +beneficial to classify them as positive for cancer when the conditional probability +estimate is much lower than 0.5. + +Post-tuning the decision threshold +================================== + +One solution to address the problem stated in the introduction is to tune the decision +threshold of the classifier once the model has been trained. The +:class:`~sklearn.model_selection.TunedThresholdClassifierCV` tunes this threshold using +an internal cross-validation. The optimum threshold is chosen to maximize a given +metric. + +The following image illustrates the tuning of the decision threshold for a gradient +boosting classifier. While the vanilla and tuned classifiers provide the same +:term:`predict_proba` outputs and thus the same Receiver Operating Characteristic (ROC) +and Precision-Recall curves, the class label predictions differ because of the tuned +decision threshold. The vanilla classifier predicts the class of interest for a +conditional probability greater than 0.5 while the tuned classifier predicts the class +of interest for a very low probability (around 0.02). This decision threshold optimizes +a utility metric defined by the business (in this case an insurance company). + +.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cost_sensitive_learning_002.png + :target: ../auto_examples/model_selection/plot_cost_sensitive_learning.html + :align: center + +Options to tune the decision threshold +-------------------------------------- + +The decision threshold can be tuned through different strategies controlled by the +parameter `scoring`. + +One way to tune the threshold is by maximizing a pre-defined scikit-learn metric. These +metrics can be found by calling the function :func:`~sklearn.metrics.get_scorer_names`. +By default, the balanced accuracy is the metric used but be aware that one should choose +a meaningful metric for their use case. + +.. note:: + + It is important to notice that these metrics come with default parameters, notably + the label of the class of interest (i.e. `pos_label`). Thus, if this label is not + the right one for your application, you need to define a scorer and pass the right + `pos_label` (and additional parameters) using the + :func:`~sklearn.metrics.make_scorer`. Refer to :ref:`scoring` to get + information to define your own scoring function. For instance, we show how to pass + the information to the scorer that the label of interest is `0` when maximizing the + :func:`~sklearn.metrics.f1_score`:: + + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.model_selection import TunedThresholdClassifierCV + >>> from sklearn.metrics import make_scorer, f1_score + >>> X, y = make_classification( + ... n_samples=1_000, weights=[0.1, 0.9], random_state=0) + >>> pos_label = 0 + >>> scorer = make_scorer(f1_score, pos_label=pos_label) + >>> base_model = LogisticRegression() + >>> model = TunedThresholdClassifierCV(base_model, scoring=scorer) + >>> scorer(model.fit(X, y), X, y) + 0.88... + >>> # compare it with the internal score found by cross-validation + >>> model.best_score_ + 0.86... + +Important notes regarding the internal cross-validation +------------------------------------------------------- + +By default :class:`~sklearn.model_selection.TunedThresholdClassifierCV` uses a 5-fold +stratified cross-validation to tune the decision threshold. The parameter `cv` allows to +control the cross-validation strategy. It is possible to bypass cross-validation by +setting `cv="prefit"` and providing a fitted classifier. In this case, the decision +threshold is tuned on the data provided to the `fit` method. + +However, you should be extremely careful when using this option. You should never use +the same data for training the classifier and tuning the decision threshold due to the +risk of overfitting. Refer to the following example section for more details (cf. +:ref:`TunedThresholdClassifierCV_no_cv`). If you have limited resources, consider using +a float number for `cv` to limit to an internal single train-test split. + +The option `cv="prefit"` should only be used when the provided classifier was already +trained, and you just want to find the best decision threshold using a new validation +set. + +.. _FixedThresholdClassifier: + +Manually setting the decision threshold +--------------------------------------- + +The previous sections discussed strategies to find an optimal decision threshold. It is +also possible to manually set the decision threshold using the class +:class:`~sklearn.model_selection.FixedThresholdClassifier`. + +Examples +-------- + +- See the example entitled + :ref:`sphx_glr_auto_examples_model_selection_plot_tuned_decision_threshold.py`, + to get insights on the post-tuning of the decision threshold. +- See the example entitled + :ref:`sphx_glr_auto_examples_model_selection_plot_cost_sensitive_learning.py`, + to learn about cost-sensitive learning and decision threshold tuning. diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index f976110ad8712..2de39d0317bf5 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -182,6 +182,10 @@ It suffers from various drawbacks: :align: center :scale: 50 +For more detailed descriptions of the issues shown above and how to address them, +refer to the examples :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py` +and :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`. + K-means is often referred to as Lloyd's algorithm. In basic terms, the algorithm has three steps. The first step chooses the initial centroids, with the most basic method being to choose :math:`k` samples from the dataset @@ -218,7 +222,9 @@ initializations of the centroids. One method to help address this issue is the k-means++ initialization scheme, which has been implemented in scikit-learn (use the ``init='k-means++'`` parameter). This initializes the centroids to be (generally) distant from each other, leading to probably better results than -random initialization, as shown in the reference. +random initialization, as shown in the reference. For a detailed example of +comaparing different initialization schemes, refer to +:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`. K-means++ can also be called independently to select seeds for other clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details @@ -231,7 +237,17 @@ weight of 2 to a sample is equivalent to adding a duplicate of that sample to the dataset :math:`X`. K-means can be used for vector quantization. This is achieved using the -transform method of a trained model of :class:`KMeans`. +``transform`` method of a trained model of :class:`KMeans`. For an example of +performing vector quantization on an image refer to +:ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`. + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_cluster_iris.py`: Example usage of + :class:`KMeans` using the iris dataset + +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data Low-level parallelism --------------------- @@ -241,19 +257,20 @@ chunks of data (256 samples) are processed in parallel, which in addition yields a low memory footprint. For more details on how to control the number of threads, please refer to our :ref:`parallelism` notes. -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating when + k-means performs intuitively and when it does not +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering handwritten digits - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating when - k-means performs intuitively and when it does not - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering handwritten digits +.. dropdown:: References -.. topic:: References: + * `"k-means++: The advantages of careful seeding" + `_ + Arthur, David, and Sergei Vassilvitskii, + *Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete + algorithms*, Society for Industrial and Applied Mathematics (2007) - * `"k-means++: The advantages of careful seeding" - `_ - Arthur, David, and Sergei Vassilvitskii, - *Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete - algorithms*, Society for Industrial and Applied Mathematics (2007) .. _mini_batch_kmeans: @@ -289,23 +306,22 @@ small, as shown in the example and cited reference. :scale: 100 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of KMeans and - MiniBatchKMeans +* :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of + :class:`KMeans` and :class:`MiniBatchKMeans` - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering using sparse - MiniBatchKMeans +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data - * :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` +.. dropdown:: References -.. topic:: References: - - * `"Web Scale K-Means clustering" - `_ - D. Sculley, *Proceedings of the 19th international conference on World - wide web* (2010) + * `"Web Scale K-Means clustering" + `_ + D. Sculley, *Proceedings of the 19th international conference on World + wide web* (2010) .. _affinity_propagation: @@ -342,52 +358,51 @@ convergence. Further, the memory complexity is of the order sparse similarity matrix is used. This makes Affinity Propagation most appropriate for small to medium sized datasets. -.. topic:: Examples: +.. dropdown:: Algorithm description - * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity - Propagation on a synthetic 2D datasets with 3 classes. + The messages sent between points belong to one of two categories. The first is + the responsibility :math:`r(i, k)`, which is the accumulated evidence that + sample :math:`k` should be the exemplar for sample :math:`i`. The second is the + availability :math:`a(i, k)` which is the accumulated evidence that sample + :math:`i` should choose sample :math:`k` to be its exemplar, and considers the + values for all other samples that :math:`k` should be an exemplar. In this way, + exemplars are chosen by samples if they are (1) similar enough to many samples + and (2) chosen by many samples to be representative of themselves. - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity Propagation on - Financial time series to find groups of companies + More formally, the responsibility of a sample :math:`k` to be the exemplar of + sample :math:`i` is given by: + .. math:: -**Algorithm description:** -The messages sent between points belong to one of two categories. The first is -the responsibility :math:`r(i, k)`, -which is the accumulated evidence that sample :math:`k` -should be the exemplar for sample :math:`i`. -The second is the availability :math:`a(i, k)` -which is the accumulated evidence that sample :math:`i` -should choose sample :math:`k` to be its exemplar, -and considers the values for all other samples that :math:`k` should -be an exemplar. In this way, exemplars are chosen by samples if they are (1) -similar enough to many samples and (2) chosen by many samples to be -representative of themselves. + r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] -More formally, the responsibility of a sample :math:`k` -to be the exemplar of sample :math:`i` is given by: + Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. + The availability of sample :math:`k` to be the exemplar of sample :math:`i` is + given by: -.. math:: + .. math:: - r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] + a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', + k)}] -Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. -The availability of sample :math:`k` -to be the exemplar of sample :math:`i` is given by: + To begin with, all values for :math:`r` and :math:`a` are set to zero, and the + calculation of each iterates until convergence. As discussed above, in order to + avoid numerical oscillations when updating the messages, the damping factor + :math:`\lambda` is introduced to iteration process: -.. math:: + .. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) + .. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) - a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', k)}] + where :math:`t` indicates the iteration times. -To begin with, all values for :math:`r` and :math:`a` are set to zero, -and the calculation of each iterates until convergence. -As discussed above, in order to avoid numerical oscillations when updating the -messages, the damping factor :math:`\lambda` is introduced to iteration process: -.. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) -.. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity + Propagation on a synthetic 2D datasets with 3 classes +* :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity Propagation + on financial time series to find groups of companies -where :math:`t` indicates the iteration times. .. _mean_shift: @@ -399,36 +414,40 @@ for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids. -The position of centroid candidates is iteratively adjusted using a technique called hill -climbing, which finds local maxima of the estimated probability density. -Given a candidate centroid :math:`x` for iteration :math:`t`, the candidate -is updated according to the following equation: +.. dropdown:: Mathematical details -.. math:: + The position of centroid candidates is iteratively adjusted using a technique + called hill climbing, which finds local maxima of the estimated probability + density. Given a candidate centroid :math:`x` for iteration :math:`t`, the + candidate is updated according to the following equation: - x^{t+1} = x^t + m(x^t) + .. math:: -Where :math:`m` is the *mean shift* vector that is computed for each -centroid that points towards a region of the maximum increase in the density of points. -To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples within -a given distance around :math:`x`. Then :math:`m` is computed using the following -equation, effectively updating a centroid to be the mean of the samples within -its neighborhood: + x^{t+1} = x^t + m(x^t) -.. math:: + Where :math:`m` is the *mean shift* vector that is computed for each centroid + that points towards a region of the maximum increase in the density of points. + To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples + within a given distance around :math:`x`. Then :math:`m` is computed using the + following equation, effectively updating a centroid to be the mean of the + samples within its neighborhood: - m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x + .. math:: -In general, the equation for :math:`m` depends on a kernel used for density estimation. -The generic formula is: + m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x -.. math:: + In general, the equation for :math:`m` depends on a kernel used for density + estimation. The generic formula is: + + .. math:: - m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - x)} - x + m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - + x)} - x + + In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough + and is equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether + :math:`y` is in the neighborhood of :math:`x`. -In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough and is -equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether :math:`y` is in -the neighborhood of :math:`x`. The algorithm automatically sets the number of clusters, instead of relying on a parameter ``bandwidth``, which dictates the size of the region to search through. @@ -450,16 +469,16 @@ given sample. :scale: 50 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift clustering - on a synthetic 2D datasets with 3 classes. +* :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift clustering + on a synthetic 2D datasets with 3 classes. -.. topic:: References: +.. dropdown:: References - * :doi:`"Mean shift: A robust approach toward feature space analysis" - <10.1109/34.1000236>` - D. Comaniciu and P. Meer, *IEEE Transactions on Pattern Analysis and Machine Intelligence* (2002) + * :doi:`"Mean shift: A robust approach toward feature space analysis" + <10.1109/34.1000236>` D. Comaniciu and P. Meer, *IEEE Transactions on Pattern + Analysis and Machine Intelligence* (2002) .. _spectral_clustering: @@ -510,25 +529,26 @@ computed using a function of a gradient of the image. See the examples for such an application. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting objects - from a noisy background using spectral clustering. +* :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting objects + from a noisy background using spectral clustering. +* :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral clustering + to split the image of coins in regions. - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral clustering - to split the image of coins in regions. .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_discretize| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_002.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 + Different label assignment strategies ------------------------------------- @@ -550,14 +570,15 @@ below. |coin_kmeans| |coin_discretize| |coin_cluster_qr| ================================ ================================ ================================ -.. topic:: References: +.. dropdown:: References - * `"Multiclass spectral clustering" - `_ - Stella X. Yu, Jianbo Shi, 2003 + * `"Multiclass spectral clustering" + `_ + Stella X. Yu, Jianbo Shi, 2003 + + * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` + Anil Damle, Victor Minden, Lexing Ying, 2019 - * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` - Anil Damle, Victor Minden, Lexing Ying, 2019 .. _spectral_clustering_graph: @@ -573,28 +594,25 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`:: ... assign_labels='discretize') >>> sc.fit_predict(adjacency_matrix) # doctest: +SKIP -.. topic:: References: +.. dropdown:: References + + * :doi:`"A Tutorial on Spectral Clustering" <10.1007/s11222-007-9033-z>` Ulrike + von Luxburg, 2007 - * :doi:`"A Tutorial on Spectral Clustering" - <10.1007/s11222-007-9033-z>` - Ulrike von Luxburg, 2007 + * :doi:`"Normalized cuts and image segmentation" <10.1109/34.868688>` Jianbo + Shi, Jitendra Malik, 2000 - * :doi:`"Normalized cuts and image segmentation" - <10.1109/34.868688>` - Jianbo Shi, Jitendra Malik, 2000 + * `"A Random Walks View of Spectral Segmentation" + `_ + Marina Meila, Jianbo Shi, 2001 - * `"A Random Walks View of Spectral Segmentation" - `_ - Marina Meila, Jianbo Shi, 2001 + * `"On Spectral Clustering: Analysis and an algorithm" + `_ + Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 - * `"On Spectral Clustering: Analysis and an algorithm" - `_ - Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 + * :arxiv:`"Preconditioned Spectral Clustering for Stochastic Block Partition + Streaming Graph Challenge" <1708.07481>` David Zhuzhunashvili, Andrew Knyazev - * :arxiv:`"Preconditioned Spectral Clustering for Stochastic - Block Partition Streaming Graph Challenge" - <1708.07481>` - David Zhuzhunashvili, Andrew Knyazev .. _hierarchical_clustering: @@ -655,10 +673,14 @@ while not robust to noisy data, can be computed very efficiently and can therefore be useful to provide hierarchical clustering of larger datasets. Single linkage can also perform well on non-globular data. -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of the + different linkage strategies in a real dataset. + + * :ref:`sphx_glr_auto_examples_cluster_plot_linkage_comparison.py`: exploration of + the different linkage strategies in toy datasets. - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of the - different linkage strategies in a real dataset. Visualization of cluster hierarchy ---------------------------------- @@ -671,6 +693,9 @@ of the data, though more so in the case of small sample sizes. :target: ../auto_examples/cluster/plot_agglomerative_dendrogram.html :scale: 42 +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_dendrogram.py` Adding connectivity constraints @@ -712,21 +737,6 @@ using :func:`sklearn.feature_extraction.image.grid_to_graph` to enable only merging of neighboring pixels on an image, as in the :ref:`coin ` example. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward clustering - to split the image of coins in regions. - - * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example of - Ward algorithm on a swiss-roll, comparison of structured approaches - versus unstructured approaches. - - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: - Example of dimensionality reduction with feature agglomeration based on - Ward hierarchical clustering. - - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` - .. warning:: **Connectivity constraints with single, average and complete linkage** Connectivity constraints and single, complete or average linkage can enhance @@ -754,6 +764,21 @@ enable only merging of neighboring pixels on an image, as in the :target: ../auto_examples/cluster/plot_agglomerative_clustering.html :scale: 38 +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward + clustering to split the image of coins in regions. + +* :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example + of Ward algorithm on a swiss-roll, comparison of structured approaches + versus unstructured approaches. + +* :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: Example + of dimensionality reduction with feature agglomeration based on Ward + hierarchical clustering. + +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` + Varying the metric ------------------- @@ -786,9 +811,10 @@ each class. :target: ../auto_examples/cluster/plot_agglomerative_clustering_metrics.html :scale: 32 -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` Bisecting K-Means ----------------- @@ -831,24 +857,23 @@ Difference between Bisecting K-Means and regular K-Means can be seen on example While the regular K-Means algorithm tends to create non-related clusters, clusters from Bisecting K-Means are well ordered and create quite a visible hierarchy. -.. topic:: References: - - * `"A Comparison of Document Clustering Techniques" - `_ - Michael Steinbach, George Karypis and Vipin Kumar, - Department of Computer Science and Egineering, University of Minnesota - (June 2000) - * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog Data" - `_ - K.Abirami and Dr.P.Mayilvahanan, - International Journal of Emerging Technologies in Engineering Research (IJETER) - Volume 4, Issue 8, (August 2016) - * `"Bisecting K-means Algorithm Based on K-valued Self-determining - and Clustering Center Optimization" - `_ - Jian Di, Xinyue Gou - School of Control and Computer Engineering,North China Electric Power University, - Baoding, Hebei, China (August 2017) +.. dropdown:: References + + * `"A Comparison of Document Clustering Techniques" + `_ Michael + Steinbach, George Karypis and Vipin Kumar, Department of Computer Science and + Egineering, University of Minnesota (June 2000) + * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog + Data" + `_ + K.Abirami and Dr.P.Mayilvahanan, International Journal of Emerging + Technologies in Engineering Research (IJETER) Volume 4, Issue 8, (August 2016) + * `"Bisecting K-means Algorithm Based on K-valued Self-determining and + Clustering Center Optimization" + `_ Jian Di, Xinyue Gou School + of Control and Computer Engineering,North China Electric Power University, + Baoding, Hebei, China (August 2017) + .. _dbscan: @@ -902,71 +927,68 @@ samples that are still part of a cluster. Moreover, the outliers are indicated by black points below. .. |dbscan_results| image:: ../auto_examples/cluster/images/sphx_glr_plot_dbscan_002.png - :target: ../auto_examples/cluster/plot_dbscan.html - :scale: 50 + :target: ../auto_examples/cluster/plot_dbscan.html + :scale: 50 .. centered:: |dbscan_results| -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` - -.. topic:: Implementation - - The DBSCAN algorithm is deterministic, always generating the same clusters - when given the same data in the same order. However, the results can differ when - data is provided in a different order. First, even though the core samples - will always be assigned to the same clusters, the labels of those clusters - will depend on the order in which those samples are encountered in the data. - Second and more importantly, the clusters to which non-core samples are assigned - can differ depending on the data order. This would happen when a non-core sample - has a distance lower than ``eps`` to two core samples in different clusters. By the - triangular inequality, those two core samples must be more distant than - ``eps`` from each other, or they would be in the same cluster. The non-core - sample is assigned to whichever cluster is generated first in a pass - through the data, and so the results will depend on the data ordering. - - The current implementation uses ball trees and kd-trees - to determine the neighborhood of points, - which avoids calculating the full distance matrix - (as was done in scikit-learn versions before 0.14). - The possibility to use custom metrics is retained; - for details, see :class:`NearestNeighbors`. - -.. topic:: Memory consumption for large sample sizes - - This implementation is by default not memory efficient because it constructs - a full pairwise similarity matrix in the case where kd-trees or ball-trees cannot - be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` floats. - A couple of mechanisms for getting around this are: - - - Use :ref:`OPTICS ` clustering in conjunction with the - `extract_dbscan` method. OPTICS clustering also calculates the full - pairwise matrix, but only keeps one row in memory at a time (memory - complexity n). - - - A sparse radius neighborhood graph (where missing entries are presumed to - be out of eps) can be precomputed in a memory-efficient way and dbscan - can be run over this with ``metric='precomputed'``. See - :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. - - - The dataset can be compressed, either by removing exact duplicates if - these occur in your data, or by using BIRCH. Then you only have a - relatively small number of representatives for a large number of points. - You can then provide a ``sample_weight`` when fitting DBSCAN. - -.. topic:: References: - - * `"A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases - with Noise" `_ - Ester, M., H. P. Kriegel, J. Sander, and X. Xu, - In Proceedings of the 2nd International Conference on Knowledge Discovery - and Data Mining, Portland, OR, AAAI Press, pp. 226–231. 1996 - - * :doi:`"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN." - <10.1145/3068335>` - Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017). - In ACM Transactions on Database Systems (TODS), 42(3), 19. +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` + +.. dropdown:: Implementation + + The DBSCAN algorithm is deterministic, always generating the same clusters when + given the same data in the same order. However, the results can differ when + data is provided in a different order. First, even though the core samples will + always be assigned to the same clusters, the labels of those clusters will + depend on the order in which those samples are encountered in the data. Second + and more importantly, the clusters to which non-core samples are assigned can + differ depending on the data order. This would happen when a non-core sample + has a distance lower than ``eps`` to two core samples in different clusters. By + the triangular inequality, those two core samples must be more distant than + ``eps`` from each other, or they would be in the same cluster. The non-core + sample is assigned to whichever cluster is generated first in a pass through the + data, and so the results will depend on the data ordering. + + The current implementation uses ball trees and kd-trees to determine the + neighborhood of points, which avoids calculating the full distance matrix (as + was done in scikit-learn versions before 0.14). The possibility to use custom + metrics is retained; for details, see :class:`NearestNeighbors`. + +.. dropdown:: Memory consumption for large sample sizes + + This implementation is by default not memory efficient because it constructs a + full pairwise similarity matrix in the case where kd-trees or ball-trees cannot + be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` + floats. A couple of mechanisms for getting around this are: + + - Use :ref:`OPTICS ` clustering in conjunction with the `extract_dbscan` + method. OPTICS clustering also calculates the full pairwise matrix, but only + keeps one row in memory at a time (memory complexity n). + + - A sparse radius neighborhood graph (where missing entries are presumed to be + out of eps) can be precomputed in a memory-efficient way and dbscan can be run + over this with ``metric='precomputed'``. See + :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. + + - The dataset can be compressed, either by removing exact duplicates if these + occur in your data, or by using BIRCH. Then you only have a relatively small + number of representatives for a large number of points. You can then provide a + ``sample_weight`` when fitting DBSCAN. + +.. dropdown:: References + +* `A Density-Based Algorithm for Discovering Clusters in Large Spatial + Databases with Noise `_ + Ester, M., H. P. Kriegel, J. Sander, and X. Xu, In Proceedings of the 2nd + International Conference on Knowledge Discovery and Data Mining, Portland, OR, + AAAI Press, pp. 226-231. 1996 + +* :doi:`DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. + <10.1145/3068335>` Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, + X. (2017). In ACM Transactions on Database Systems (TODS), 42(3), 19. + .. _hdbscan: @@ -986,6 +1008,10 @@ scales by building an alternative representation of the clustering problem. This implementation is adapted from the original implementation of HDBSCAN, `scikit-learn-contrib/hdbscan `_ based on [LJ2017]_. +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_hdbscan.py` + Mutual Reachability Graph ------------------------- @@ -1026,16 +1052,16 @@ efficiently, HDBSCAN first extracts a minimum spanning tree (MST) from the fully -connected mutual reachability graph, then greedily cuts the edges with highest weight. An outline of the HDBSCAN algorithm is as follows: - 1. Extract the MST of :math:`G_{ms}` - 2. Extend the MST by adding a "self edge" for each vertex, with weight equal - to the core distance of the underlying sample. - 3. Initialize a single cluster and label for the MST. - 4. Remove the edge with the greatest weight from the MST (ties are - removed simultaneously). - 5. Assign cluster labels to the connected components which contain the - end points of the now-removed edge. If the component does not have at least - one edge it is instead assigned a "null" label marking it as noise. - 6. Repeat 4-5 until there are no more connected components. +1. Extract the MST of :math:`G_{ms}`. +2. Extend the MST by adding a "self edge" for each vertex, with weight equal + to the core distance of the underlying sample. +3. Initialize a single cluster and label for the MST. +4. Remove the edge with the greatest weight from the MST (ties are + removed simultaneously). +5. Assign cluster labels to the connected components which contain the + end points of the now-removed edge. If the component does not have at least + one edge it is instead assigned a "null" label marking it as noise. +6. Repeat 4-5 until there are no more connected components. HDBSCAN is therefore able to obtain all possible partitions achievable by DBSCAN* for a fixed choice of `min_samples` in a hierarchical fashion. @@ -1045,11 +1071,11 @@ it relies solely on the choice of `min_samples`, which tends to be a more robust hyperparameter. .. |hdbscan_ground_truth| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_005.png - :target: ../auto_examples/cluster/plot_hdbscan.html - :scale: 75 + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 .. |hdbscan_results| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_007.png - :target: ../auto_examples/cluster/plot_hdbscan.html - :scale: 75 + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 .. centered:: |hdbscan_ground_truth| .. centered:: |hdbscan_results| @@ -1060,19 +1086,19 @@ than `minimum_cluster_size` many samples are considered noise. In practice, one can set `minimum_cluster_size = min_samples` to couple the parameters and simplify the hyperparameter space. -.. topic:: References: +.. rubric:: References - .. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based Clustering - Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., Cao, L., - Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data Mining. - PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, Berlin, - Heidelberg. - :doi:`Density-Based Clustering Based on Hierarchical Density Estimates <10.1007/978-3-642-37456-2_14>` +.. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based + Clustering Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., + Cao, L., Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data + Mining. PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, + Berlin, Heidelberg. :doi:`Density-Based Clustering Based on Hierarchical + Density Estimates <10.1007/978-3-642-37456-2_14>` - .. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density Based - Clustering. In: IEEE International Conference on Data Mining Workshops (ICDMW), - 2017, pp. 33-42. - :doi:`Accelerated Hierarchical Density Based Clustering <10.1109/ICDMW.2017.12>` +.. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density + Based Clustering. In: IEEE International Conference on Data Mining Workshops + (ICDMW), 2017, pp. 33-42. :doi:`Accelerated Hierarchical Density Based + Clustering <10.1109/ICDMW.2017.12>` .. _optics: @@ -1118,47 +1144,48 @@ the linear segment clusters of the reachability plot. Note that the blue and red clusters are adjacent in the reachability plot, and can be hierarchically represented as children of a larger parent cluster. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` -.. topic:: Comparison with DBSCAN +.. dropdown:: Comparison with DBSCAN - The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are - very similar, but not always identical; specifically, labeling of periphery - and noise points. This is in part because the first samples of each dense - area processed by OPTICS have a large reachability value while being close - to other points in their area, and will thus sometimes be marked as noise - rather than periphery. This affects adjacent points when they are - considered as candidates for being marked as either periphery or noise. + The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are very + similar, but not always identical; specifically, labeling of periphery and noise + points. This is in part because the first samples of each dense area processed + by OPTICS have a large reachability value while being close to other points in + their area, and will thus sometimes be marked as noise rather than periphery. + This affects adjacent points when they are considered as candidates for being + marked as either periphery or noise. - Note that for any single value of ``eps``, DBSCAN will tend to have a - shorter run time than OPTICS; however, for repeated runs at varying ``eps`` - values, a single run of OPTICS may require less cumulative runtime than - DBSCAN. It is also important to note that OPTICS' output is close to - DBSCAN's only if ``eps`` and ``max_eps`` are close. + Note that for any single value of ``eps``, DBSCAN will tend to have a shorter + run time than OPTICS; however, for repeated runs at varying ``eps`` values, a + single run of OPTICS may require less cumulative runtime than DBSCAN. It is also + important to note that OPTICS' output is close to DBSCAN's only if ``eps`` and + ``max_eps`` are close. -.. topic:: Computational Complexity +.. dropdown:: Computational Complexity - Spatial indexing trees are used to avoid calculating the full distance - matrix, and allow for efficient memory usage on large sets of samples. - Different distance metrics can be supplied via the ``metric`` keyword. + Spatial indexing trees are used to avoid calculating the full distance matrix, + and allow for efficient memory usage on large sets of samples. Different + distance metrics can be supplied via the ``metric`` keyword. - For large datasets, similar (but not identical) results can be obtained via - :class:`HDBSCAN`. The HDBSCAN implementation is - multithreaded, and has better algorithmic runtime complexity than OPTICS, - at the cost of worse memory scaling. For extremely large datasets that - exhaust system memory using HDBSCAN, OPTICS will maintain :math:`n` (as opposed - to :math:`n^2`) memory scaling; however, tuning of the ``max_eps`` parameter - will likely need to be used to give a solution in a reasonable amount of - wall time. + For large datasets, similar (but not identical) results can be obtained via + :class:`HDBSCAN`. The HDBSCAN implementation is multithreaded, and has better + algorithmic runtime complexity than OPTICS, at the cost of worse memory scaling. + For extremely large datasets that exhaust system memory using HDBSCAN, OPTICS + will maintain :math:`n` (as opposed to :math:`n^2`) memory scaling; however, + tuning of the ``max_eps`` parameter will likely need to be used to give a + solution in a reasonable amount of wall time. -.. topic:: References: - * "OPTICS: ordering points to identify the clustering structure." - Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and JÃļrg Sander. - In ACM Sigmod Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. +.. dropdown:: References + + * "OPTICS: ordering points to identify the clustering structure." Ankerst, + Mihael, Markus M. Breunig, Hans-Peter Kriegel, and JÃļrg Sander. In ACM Sigmod + Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. + .. _birch: @@ -1194,60 +1221,60 @@ If ``n_clusters`` is set to None, the subclusters from the leaves are directly read off, otherwise a global clustering step labels these subclusters into global clusters (labels) and the samples are mapped to the global label of the nearest subcluster. -**Algorithm description:** - -- A new sample is inserted into the root of the CF Tree which is a CF Node. - It is then merged with the subcluster of the root, that has the smallest - radius after merging, constrained by the threshold and branching factor conditions. - If the subcluster has any child node, then this is done repeatedly till it reaches - a leaf. After finding the nearest subcluster in the leaf, the properties of this - subcluster and the parent subclusters are recursively updated. - -- If the radius of the subcluster obtained by merging the new sample and the - nearest subcluster is greater than the square of the threshold and if the - number of subclusters is greater than the branching factor, then a space is temporarily - allocated to this new sample. The two farthest subclusters are taken and - the subclusters are divided into two groups on the basis of the distance - between these subclusters. - -- If this split node has a parent subcluster and there is room - for a new subcluster, then the parent is split into two. If there is no room, - then this node is again split into two and the process is continued - recursively, till it reaches the root. - -**BIRCH or MiniBatchKMeans?** - - - BIRCH does not scale very well to high dimensional data. As a rule of thumb if - ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. - - If the number of instances of data needs to be reduced, or if one wants a - large number of subclusters either as a preprocessing step or otherwise, - BIRCH is more useful than MiniBatchKMeans. - +.. dropdown:: Algorithm description + + - A new sample is inserted into the root of the CF Tree which is a CF Node. It + is then merged with the subcluster of the root, that has the smallest radius + after merging, constrained by the threshold and branching factor conditions. + If the subcluster has any child node, then this is done repeatedly till it + reaches a leaf. After finding the nearest subcluster in the leaf, the + properties of this subcluster and the parent subclusters are recursively + updated. + + - If the radius of the subcluster obtained by merging the new sample and the + nearest subcluster is greater than the square of the threshold and if the + number of subclusters is greater than the branching factor, then a space is + temporarily allocated to this new sample. The two farthest subclusters are + taken and the subclusters are divided into two groups on the basis of the + distance between these subclusters. + + - If this split node has a parent subcluster and there is room for a new + subcluster, then the parent is split into two. If there is no room, then this + node is again split into two and the process is continued recursively, till it + reaches the root. + +.. dropdown:: BIRCH or MiniBatchKMeans? + + - BIRCH does not scale very well to high dimensional data. As a rule of thumb if + ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. + - If the number of instances of data needs to be reduced, or if one wants a + large number of subclusters either as a preprocessing step or otherwise, + BIRCH is more useful than MiniBatchKMeans. + + .. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png + :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html -**How to use partial_fit?** +.. dropdown:: How to use partial_fit? -To avoid the computation of global clustering, for every call of ``partial_fit`` -the user is advised + To avoid the computation of global clustering, for every call of ``partial_fit`` + the user is advised: - 1. To set ``n_clusters=None`` initially - 2. Train all data by multiple calls to partial_fit. - 3. Set ``n_clusters`` to a required value using - ``brc.set_params(n_clusters=n_clusters)``. - 4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` - which performs the global clustering. + 1. To set ``n_clusters=None`` initially. + 2. Train all data by multiple calls to partial_fit. + 3. Set ``n_clusters`` to a required value using + ``brc.set_params(n_clusters=n_clusters)``. + 4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` + which performs the global clustering. -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png - :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html +.. dropdown:: References -.. topic:: References: + * Tian Zhang, Raghu Ramakrishnan, Maron Livny BIRCH: An efficient data + clustering method for large databases. + https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf - * Tian Zhang, Raghu Ramakrishnan, Maron Livny - BIRCH: An efficient data clustering method for large databases. - https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf + * Roberto Perdisci JBirch - Java implementation of BIRCH clustering algorithm + https://code.google.com/archive/p/jbirch - * Roberto Perdisci - JBirch - Java implementation of BIRCH clustering algorithm - https://code.google.com/archive/p/jbirch .. _clustering_evaluation: @@ -1330,104 +1357,92 @@ will not necessarily be close to zero.:: -0.07... -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- **Interpretability**: The unadjusted Rand index is proportional - to the number of sample pairs whose labels are the same in both - `labels_pred` and `labels_true`, or are different in both. + - **Interpretability**: The unadjusted Rand index is proportional to the + number of sample pairs whose labels are the same in both `labels_pred` and + `labels_true`, or are different in both. -- **Random (uniform) label assignments have an adjusted Rand index - score close to 0.0** for any value of ``n_clusters`` and - ``n_samples`` (which is not the case for the unadjusted Rand index - or the V-measure for instance). + - **Random (uniform) label assignments have an adjusted Rand index score close + to 0.0** for any value of ``n_clusters`` and ``n_samples`` (which is not the + case for the unadjusted Rand index or the V-measure for instance). -- **Bounded range**: Lower values indicate different labelings, - similar clusterings have a high (adjusted or unadjusted) Rand index, - 1.0 is the perfect match score. The score range is [0, 1] for the - unadjusted Rand index and [-1, 1] for the adjusted Rand index. + - **Bounded range**: Lower values indicate different labelings, similar + clusterings have a high (adjusted or unadjusted) Rand index, 1.0 is the + perfect match score. The score range is [0, 1] for the unadjusted Rand index + and [-1, 1] for the adjusted Rand index. -- **No assumption is made on the cluster structure**: The (adjusted or - unadjusted) Rand index can be used to compare all kinds of - clustering algorithms, and can be used to compare clustering - algorithms such as k-means which assumes isotropic blob shapes with - results of spectral clustering algorithms which can find cluster - with "folded" shapes. + - **No assumption is made on the cluster structure**: The (adjusted or + unadjusted) Rand index can be used to compare all kinds of clustering + algorithms, and can be used to compare clustering algorithms such as k-means + which assumes isotropic blob shapes with results of spectral clustering + algorithms which can find cluster with "folded" shapes. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - Contrary to inertia, the **(adjusted or unadjusted) Rand index requires + knowledge of the ground truth classes** which is almost never available in + practice or requires manual assignment by human annotators (as in the + supervised learning setting). -- Contrary to inertia, the **(adjusted or unadjusted) Rand index - requires knowledge of the ground truth classes** which is almost - never available in practice or requires manual assignment by human - annotators (as in the supervised learning setting). + However (adjusted or unadjusted) Rand index can also be useful in a purely + unsupervised setting as a building block for a Consensus Index that can be + used for clustering model selection (TODO). - However (adjusted or unadjusted) Rand index can also be useful in a - purely unsupervised setting as a building block for a Consensus - Index that can be used for clustering model selection (TODO). + - The **unadjusted Rand index is often close to 1.0** even if the clusterings + themselves differ significantly. This can be understood when interpreting + the Rand index as the accuracy of element pair labeling resulting from the + clusterings: In practice there often is a majority of element pairs that are + assigned the ``different`` pair label under both the predicted and the + ground truth clustering resulting in a high proportion of pair labels that + agree, which leads subsequently to a high score. -- The **unadjusted Rand index is often close to 1.0** even if the - clusterings themselves differ significantly. This can be understood - when interpreting the Rand index as the accuracy of element pair - labeling resulting from the clusterings: In practice there often is - a majority of element pairs that are assigned the ``different`` pair - label under both the predicted and the ground truth clustering - resulting in a high proportion of pair labels that agree, which - leads subsequently to a high score. +.. rubric:: Examples -.. topic:: Examples: +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: + Analysis of the impact of the dataset size on the value of + clustering measures for random assignments. - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: - Analysis of the impact of the dataset size on the value of - clustering measures for random assignments. +.. dropdown:: Mathematical formulation + If C is a ground truth class assignment and K the clustering, let us define + :math:`a` and :math:`b` as: -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ + - :math:`a`, the number of pairs of elements that are in the same set in C and + in the same set in K -If C is a ground truth class assignment and K the clustering, let us -define :math:`a` and :math:`b` as: + - :math:`b`, the number of pairs of elements that are in different sets in C and + in different sets in K -- :math:`a`, the number of pairs of elements that are in the same set - in C and in the same set in K + The unadjusted Rand index is then given by: -- :math:`b`, the number of pairs of elements that are in different sets - in C and in different sets in K + .. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} -The unadjusted Rand index is then given by: + where :math:`C_2^{n_{samples}}` is the total number of possible pairs in the + dataset. It does not matter if the calculation is performed on ordered pairs or + unordered pairs as long as the calculation is performed consistently. -.. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} + However, the Rand index does not guarantee that random label assignments will + get a value close to zero (esp. if the number of clusters is in the same order + of magnitude as the number of samples). -where :math:`C_2^{n_{samples}}` is the total number of possible pairs -in the dataset. It does not matter if the calculation is performed on -ordered pairs or unordered pairs as long as the calculation is -performed consistently. + To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of + random labelings by defining the adjusted Rand index as follows: -However, the Rand index does not guarantee that random label assignments -will get a value close to zero (esp. if the number of clusters is in -the same order of magnitude as the number of samples). + .. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} -To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of -random labelings by defining the adjusted Rand index as follows: +.. dropdown:: References -.. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} + * `Comparing Partitions + `_ L. Hubert and P. + Arabie, Journal of Classification 1985 -.. topic:: References + * `Properties of the Hubert-Arabie adjusted Rand index + `_ D. Steinley, Psychological + Methods 2004 - * `Comparing Partitions - `_ - L. Hubert and P. Arabie, Journal of Classification 1985 - - * `Properties of the Hubert-Arabie adjusted Rand index - `_ - D. Steinley, Psychological Methods 2004 - - * `Wikipedia entry for the Rand index - `_ - - * `Wikipedia entry for the adjusted Rand index - `_ + * `Wikipedia entry for the Rand index + `_ .. _mutual_info_score: @@ -1486,133 +1501,123 @@ Bad (e.g. independent labelings) have non-positive scores:: -0.10526... -Advantages -~~~~~~~~~~ - -- **Random (uniform) label assignments have a AMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). - -- **Upper bound of 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, an AMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). +.. topic:: Advantages: + - **Random (uniform) label assignments have a AMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). -Drawbacks -~~~~~~~~~ + - **Upper bound of 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, an AMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). -- Contrary to inertia, **MI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). +.. topic:: Drawbacks: - However MI-based measures can also be useful in purely unsupervised setting as a - building block for a Consensus Index that can be used for clustering - model selection. + - Contrary to inertia, **MI-based measures require the knowledge of the ground + truth classes** while almost never available in practice or requires manual + assignment by human annotators (as in the supervised learning setting). -- NMI and MI are not adjusted against chance. + However MI-based measures can also be useful in purely unsupervised setting + as a building block for a Consensus Index that can be used for clustering + model selection. + - NMI and MI are not adjusted against chance. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. This example also includes the Adjusted Rand - Index. +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for random + assignments. This example also includes the Adjusted Rand Index. +.. dropdown:: Mathematical formulation -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ + Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. + Their entropy is the amount of uncertainty for a partition set, defined by: -Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. -Their entropy is the amount of uncertainty for a partition set, defined by: + .. math:: H(U) = - \sum_{i=1}^{|U|}P(i)\log(P(i)) -.. math:: H(U) = - \sum_{i=1}^{|U|}P(i)\log(P(i)) + where :math:`P(i) = |U_i| / N` is the probability that an object picked at + random from :math:`U` falls into class :math:`U_i`. Likewise for :math:`V`: -where :math:`P(i) = |U_i| / N` is the probability that an object picked at -random from :math:`U` falls into class :math:`U_i`. Likewise for :math:`V`: + .. math:: H(V) = - \sum_{j=1}^{|V|}P'(j)\log(P'(j)) -.. math:: H(V) = - \sum_{j=1}^{|V|}P'(j)\log(P'(j)) + With :math:`P'(j) = |V_j| / N`. The mutual information (MI) between :math:`U` + and :math:`V` is calculated by: -With :math:`P'(j) = |V_j| / N`. The mutual information (MI) between :math:`U` -and :math:`V` is calculated by: + .. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|}\sum_{j=1}^{|V|}P(i, j)\log\left(\frac{P(i,j)}{P(i)P'(j)}\right) -.. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|}\sum_{j=1}^{|V|}P(i, j)\log\left(\frac{P(i,j)}{P(i)P'(j)}\right) + where :math:`P(i, j) = |U_i \cap V_j| / N` is the probability that an object + picked at random falls into both classes :math:`U_i` and :math:`V_j`. -where :math:`P(i, j) = |U_i \cap V_j| / N` is the probability that an object -picked at random falls into both classes :math:`U_i` and :math:`V_j`. + It also can be expressed in set cardinality formulation: -It also can be expressed in set cardinality formulation: + .. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \frac{|U_i \cap V_j|}{N}\log\left(\frac{N|U_i \cap V_j|}{|U_i||V_j|}\right) -.. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \frac{|U_i \cap V_j|}{N}\log\left(\frac{N|U_i \cap V_j|}{|U_i||V_j|}\right) + The normalized mutual information is defined as -The normalized mutual information is defined as + .. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\text{mean}(H(U), H(V))} -.. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\text{mean}(H(U), H(V))} + This value of the mutual information and also the normalized variant is not + adjusted for chance and will tend to increase as the number of different labels + (clusters) increases, regardless of the actual amount of "mutual information" + between the label assignments. -This value of the mutual information and also the normalized variant is not -adjusted for chance and will tend to increase as the number of different labels -(clusters) increases, regardless of the actual amount of "mutual information" -between the label assignments. + The expected value for the mutual information can be calculated using the + following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number + of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in + :math:`V_j`). -The expected value for the mutual information can be calculated using the -following equation [VEB2009]_. In this equation, -:math:`a_i = |U_i|` (the number of elements in :math:`U_i`) and -:math:`b_j = |V_j|` (the number of elements in :math:`V_j`). + .. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ + }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) + \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! + (N-a_i-b_j+n_{ij})!} + Using the expected value, the adjusted mutual information can then be calculated + using a similar form to that of the adjusted Rand index: -.. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ - }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) - \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! - (N-a_i-b_j+n_{ij})!} + .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} -Using the expected value, the adjusted mutual information can then be -calculated using a similar form to that of the adjusted Rand index: + For normalized mutual information and adjusted mutual information, the + normalizing value is typically some *generalized* mean of the entropies of each + clustering. Various generalized means exist, and no firm rules exist for + preferring one over the others. The decision is largely a field-by-field basis; + for instance, in community detection, the arithmetic mean is most common. Each + normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In + our implementation, this is controlled by the ``average_method`` parameter. -.. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} + Vinh et al. (2010) named variants of NMI and AMI by their averaging method + [VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic + means; we use these more broadly common names. -For normalized mutual information and adjusted mutual information, the normalizing -value is typically some *generalized* mean of the entropies of each clustering. -Various generalized means exist, and no firm rules exist for preferring one over the -others. The decision is largely a field-by-field basis; for instance, in community -detection, the arithmetic mean is most common. Each -normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In our -implementation, this is controlled by the ``average_method`` parameter. + .. rubric:: References -Vinh et al. (2010) named variants of NMI and AMI by their averaging method [VEB2010]_. Their -'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these -more broadly common names. + * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles - a + knowledge reuse framework for combining multiple partitions". Journal of + Machine Learning Research 3: 583-617. `doi:10.1162/153244303321897735 + `_. -.. topic:: References + * `Wikipedia entry for the (normalized) Mutual Information + `_ - * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a - knowledge reuse framework for combining multiple partitions". Journal of - Machine Learning Research 3: 583–617. - `doi:10.1162/153244303321897735 `_. + * `Wikipedia entry for the Adjusted Mutual Information + `_ - * `Wikipedia entry for the (normalized) Mutual Information - `_ + .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures + for clusterings comparison". Proceedings of the 26th Annual International + Conference on Machine Learning - ICML '09. `doi:10.1145/1553374.1553511 + `_. ISBN + 9781605585161. - * `Wikipedia entry for the Adjusted Mutual Information - `_ - - .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures - for clusterings comparison". Proceedings of the 26th Annual International - Conference on Machine Learning - ICML '09. - `doi:10.1145/1553374.1553511 `_. - ISBN 9781605585161. - - .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for - Clusterings Comparison: Variants, Properties, Normalization and - Correction for Chance". JMLR - - - .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of - community - detection algorithms on artificial networks". Scientific Reports 6: 30750. - `doi:10.1038/srep30750 `_. + .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures + for Clusterings Comparison: Variants, Properties, Normalization and + Correction for Chance". JMLR + + .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis + of community detection algorithms on artificial networks". Scientific + Reports 6: 30750. `doi:10.1038/srep30750 + `_. .. _homogeneity_completeness: @@ -1695,94 +1700,89 @@ homogeneous but not complete:: homogeneity_score(a, b) == completeness_score(b, a) -Advantages -~~~~~~~~~~ - -- **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. - -- Intuitive interpretation: clustering with bad V-measure can be - **qualitatively analyzed in terms of homogeneity and completeness** - to better feel what 'kind' of mistakes is done by the assignment. +.. topic:: Advantages: -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. + - Intuitive interpretation: clustering with bad V-measure can be + **qualitatively analyzed in terms of homogeneity and completeness** to + better feel what 'kind' of mistakes is done by the assignment. -Drawbacks -~~~~~~~~~ + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. -- The previously introduced metrics are **not normalized with regards to - random labeling**: this means that depending on the number of samples, - clusters and ground truth classes, a completely random labeling will - not always yield the same values for homogeneity, completeness and - hence v-measure. In particular **random labeling won't yield zero - scores especially when the number of clusters is large**. +.. topic:: Drawbacks: - This problem can safely be ignored when the number of samples is more - than a thousand and the number of clusters is less than 10. **For - smaller sample sizes or larger number of clusters it is safer to use - an adjusted index such as the Adjusted Rand Index (ARI)**. + - The previously introduced metrics are **not normalized with regards to + random labeling**: this means that depending on the number of samples, + clusters and ground truth classes, a completely random labeling will not + always yield the same values for homogeneity, completeness and hence + v-measure. In particular **random labeling won't yield zero scores + especially when the number of clusters is large**. -.. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png - :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html - :align: center - :scale: 100 + This problem can safely be ignored when the number of samples is more than a + thousand and the number of clusters is less than 10. **For smaller sample + sizes or larger number of clusters it is safer to use an adjusted index such + as the Adjusted Rand Index (ARI)**. -- These metrics **require the knowledge of the ground truth classes** while - almost never available in practice or requires manual assignment by - human annotators (as in the supervised learning setting). + .. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png + :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html + :align: center + :scale: 100 + - These metrics **require the knowledge of the ground truth classes** while + almost never available in practice or requires manual assignment by human + annotators (as in the supervised learning setting). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. +.. dropdown:: Mathematical formulation -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ + Homogeneity and completeness scores are formally given by: -Homogeneity and completeness scores are formally given by: + .. math:: h = 1 - \frac{H(C|K)}{H(C)} -.. math:: h = 1 - \frac{H(C|K)}{H(C)} + .. math:: c = 1 - \frac{H(K|C)}{H(K)} -.. math:: c = 1 - \frac{H(K|C)}{H(K)} + where :math:`H(C|K)` is the **conditional entropy of the classes given the + cluster assignments** and is given by: -where :math:`H(C|K)` is the **conditional entropy of the classes given -the cluster assignments** and is given by: + .. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} + \cdot \log\left(\frac{n_{c,k}}{n_k}\right) -.. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} - \cdot \log\left(\frac{n_{c,k}}{n_k}\right) + and :math:`H(C)` is the **entropy of the classes** and is given by: -and :math:`H(C)` is the **entropy of the classes** and is given by: + .. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) -.. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) + with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` the + number of samples respectively belonging to class :math:`c` and cluster + :math:`k`, and finally :math:`n_{c,k}` the number of samples from class + :math:`c` assigned to cluster :math:`k`. -with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` -the number of samples respectively belonging to class :math:`c` and -cluster :math:`k`, and finally :math:`n_{c,k}` the number of samples -from class :math:`c` assigned to cluster :math:`k`. + The **conditional entropy of clusters given class** :math:`H(K|C)` and the + **entropy of clusters** :math:`H(K)` are defined in a symmetric manner. -The **conditional entropy of clusters given class** :math:`H(K|C)` and the -**entropy of clusters** :math:`H(K)` are defined in a symmetric manner. + Rosenberg and Hirschberg further define **V-measure** as the **harmonic mean of + homogeneity and completeness**: -Rosenberg and Hirschberg further define **V-measure** as the **harmonic -mean of homogeneity and completeness**: + .. math:: v = 2 \cdot \frac{h \cdot c}{h + c} -.. math:: v = 2 \cdot \frac{h \cdot c}{h + c} +.. rubric:: References -.. topic:: References +* `V-Measure: A conditional entropy-based external cluster evaluation measure + `_ Andrew Rosenberg and Julia + Hirschberg, 2007 - * `V-Measure: A conditional entropy-based external cluster evaluation - measure `_ - Andrew Rosenberg and Julia Hirschberg, 2007 +.. [B2011] `Identification and Characterization of Events in Social Media + `_, Hila + Becker, PhD Thesis. - .. [B2011] `Identication and Characterization of Events in Social Media - `_, Hila - Becker, PhD Thesis. .. _fowlkes_mallows_scores: @@ -1800,7 +1800,7 @@ Where ``TP`` is the number of **True Positive** (i.e. the number of pair of points that belong to the same clusters in both the true labels and the predicted labels), ``FP`` is the number of **False Positive** (i.e. the number of pair of points that belong to the same clusters in the true labels and not -in the predicted labels) and ``FN`` is the number of **False Negative** (i.e the +in the predicted labels) and ``FN`` is the number of **False Negative** (i.e. the number of pair of points that belongs in the same clusters in the predicted labels and not in the true labels). @@ -1835,34 +1835,31 @@ Bad (e.g. independent labelings) have zero scores:: >>> metrics.fowlkes_mallows_score(labels_true, labels_pred) 0.0 -Advantages -~~~~~~~~~~ - -- **Random (uniform) label assignments have a FMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). +.. topic:: Advantages: -- **Upper-bounded at 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, values of exactly 0 indicate - **purely** independent label assignments and a FMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). + - **Random (uniform) label assignments have a FMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - **Upper-bounded at 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, values of exactly 0 indicate **purely** independent + label assignments and a FMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contrary to inertia, **FMI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). + - Contrary to inertia, **FMI-based measures require the knowledge of the + ground truth classes** while almost never available in practice or requires + manual assignment by human annotators (as in the supervised learning + setting). -.. topic:: References +.. dropdown:: References * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two hierarchical clusterings". Journal of the American Statistical Association. @@ -1871,6 +1868,7 @@ Drawbacks * `Wikipedia entry for the Fowlkes-Mallows Index `_ + .. _silhouette_coefficient: Silhouette Coefficient @@ -1913,34 +1911,31 @@ cluster analysis. >>> metrics.silhouette_score(X, labels, metric='euclidean') 0.55... -.. topic:: References - - * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the - Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>` - . Computational and Applied Mathematics 20: 53–65. +.. topic:: Advantages: + - The score is bounded between -1 for incorrect clustering and +1 for highly + dense clustering. Scores around zero indicate overlapping clusters. -Advantages -~~~~~~~~~~ + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. -- The score is bounded between -1 for incorrect clustering and +1 for highly - dense clustering. Scores around zero indicate overlapping clusters. +.. topic:: Drawbacks: -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. + - The Silhouette Coefficient is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. +.. rubric:: Examples -Drawbacks -~~~~~~~~~ +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In + this example the silhouette analysis is used to choose an optimal value for + n_clusters. -- The Silhouette Coefficient is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. +.. dropdown:: References -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In this example - the silhouette analysis is used to choose an optimal value for n_clusters. + * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the + Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>`. + Computational and Applied Mathematics 20: 53-65. .. _calinski_harabasz_index: @@ -1971,52 +1966,50 @@ cluster analysis: >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans_model.labels_ >>> metrics.calinski_harabasz_score(X, labels) - 561.62... + 561.59... -Advantages -~~~~~~~~~~ -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. +.. topic:: Advantages: -- The score is fast to compute. + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. + - The score is fast to compute. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The Calinski-Harabasz index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. + - The Calinski-Harabasz index is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +.. dropdown:: Mathematical formulation -For a set of data :math:`E` of size :math:`n_E` which has been clustered into -:math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the -ratio of the between-clusters dispersion mean and the within-cluster dispersion: + For a set of data :math:`E` of size :math:`n_E` which has been clustered into + :math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the + ratio of the between-clusters dispersion mean and the within-cluster + dispersion: -.. math:: - s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} + .. math:: + s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} -where :math:`\mathrm{tr}(B_k)` is trace of the between group dispersion matrix -and :math:`\mathrm{tr}(W_k)` is the trace of the within-cluster dispersion -matrix defined by: + where :math:`\mathrm{tr}(B_k)` is trace of the between group dispersion matrix + and :math:`\mathrm{tr}(W_k)` is the trace of the within-cluster dispersion + matrix defined by: -.. math:: W_k = \sum_{q=1}^k \sum_{x \in C_q} (x - c_q) (x - c_q)^T + .. math:: W_k = \sum_{q=1}^k \sum_{x \in C_q} (x - c_q) (x - c_q)^T -.. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T + .. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T -with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the center -of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and :math:`n_q` the -number of points in cluster :math:`q`. + with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the + center of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and + :math:`n_q` the number of points in cluster :math:`q`. -.. topic:: References +.. dropdown:: References - * Caliński, T., & Harabasz, J. (1974). - `"A Dendrite Method for Cluster Analysis" - `_. - :doi:`Communications in Statistics-theory and Methods 3: 1-27 <10.1080/03610927408827101>`. + * Caliński, T., & Harabasz, J. (1974). `"A Dendrite Method for Cluster Analysis" + `_. + :doi:`Communications in Statistics-theory and Methods 3: 1-27 + <10.1080/03610927408827101>`. .. _davies-bouldin_index: @@ -2047,60 +2040,57 @@ cluster analysis as follows: >>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans.labels_ >>> davies_bouldin_score(X, labels) - 0.6619... + 0.666... -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- The computation of Davies-Bouldin is simpler than that of Silhouette scores. -- The index is solely based on quantities and features inherent to the dataset - as its computation only uses point-wise distances. + - The computation of Davies-Bouldin is simpler than that of Silhouette scores. + - The index is solely based on quantities and features inherent to the dataset + as its computation only uses point-wise distances. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The Davies-Boulding index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained from - DBSCAN. -- The usage of centroid distance limits the distance metric to Euclidean space. + - The Davies-Boulding index is generally higher for convex clusters than other + concepts of clusters, such as density based clusters like those obtained + from DBSCAN. + - The usage of centroid distance limits the distance metric to Euclidean + space. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +.. dropdown:: Mathematical formulation -The index is defined as the average similarity between each cluster :math:`C_i` -for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of -this index, similarity is defined as a measure :math:`R_{ij}` that trades off: + The index is defined as the average similarity between each cluster :math:`C_i` + for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of + this index, similarity is defined as a measure :math:`R_{ij}` that trades off: -- :math:`s_i`, the average distance between each point of cluster :math:`i` and - the centroid of that cluster -- also know as cluster diameter. -- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and :math:`j`. + - :math:`s_i`, the average distance between each point of cluster :math:`i` and + the centroid of that cluster -- also know as cluster diameter. + - :math:`d_{ij}`, the distance between cluster centroids :math:`i` and + :math:`j`. -A simple choice to construct :math:`R_{ij}` so that it is nonnegative and -symmetric is: + A simple choice to construct :math:`R_{ij}` so that it is nonnegative and + symmetric is: -.. math:: - R_{ij} = \frac{s_i + s_j}{d_{ij}} + .. math:: + R_{ij} = \frac{s_i + s_j}{d_{ij}} -Then the Davies-Bouldin index is defined as: - -.. math:: - DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} + Then the Davies-Bouldin index is defined as: + .. math:: + DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} -.. topic:: References +.. dropdown:: References - * Davies, David L.; Bouldin, Donald W. (1979). - :doi:`"A Cluster Separation Measure" <10.1109/TPAMI.1979.4766909>` - IEEE Transactions on Pattern Analysis and Machine Intelligence. - PAMI-1 (2): 224-227. + * Davies, David L.; Bouldin, Donald W. (1979). :doi:`"A Cluster Separation + Measure" <10.1109/TPAMI.1979.4766909>` IEEE Transactions on Pattern Analysis + and Machine Intelligence. PAMI-1 (2): 224-227. - * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). - :doi:`"On Clustering Validation Techniques" <10.1023/A:1012801612483>` - Journal of Intelligent Information Systems, 17(2-3), 107-145. + * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). :doi:`"On + Clustering Validation Techniques" <10.1023/A:1012801612483>` Journal of + Intelligent Information Systems, 17(2-3), 107-145. - * `Wikipedia entry for Davies-Bouldin index - `_. + * `Wikipedia entry for Davies-Bouldin index + `_. .. _contingency_matrix: @@ -2134,30 +2124,28 @@ contingency matrix where the order of rows and columns correspond to a list of classes. -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- Allows to examine the spread of each true cluster across predicted - clusters and vice versa. + - Allows to examine the spread of each true cluster across predicted clusters + and vice versa. -- The contingency table calculated is typically utilized in the calculation - of a similarity statistic (like the others listed in this document) between - the two clusterings. + - The contingency table calculated is typically utilized in the calculation of + a similarity statistic (like the others listed in this document) between the + two clusterings. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contingency matrix is easy to interpret for a small number of clusters, but - becomes very hard to interpret for a large number of clusters. + - Contingency matrix is easy to interpret for a small number of clusters, but + becomes very hard to interpret for a large number of clusters. -- It doesn't give a single metric to use as an objective for clustering - optimisation. + - It doesn't give a single metric to use as an objective for clustering + optimisation. +.. dropdown:: References -.. topic:: References + * `Wikipedia entry for contingency matrix + `_ - * `Wikipedia entry for contingency matrix - `_ .. _pair_confusion_matrix: @@ -2180,19 +2168,19 @@ under the true and predicted clusterings. It has the following entries: - :math:`C_{00}` : number of pairs with both clusterings having the samples - not clustered together +:math:`C_{00}` : number of pairs with both clusterings having the samples +not clustered together - :math:`C_{10}` : number of pairs with the true label clustering having the - samples clustered together but the other clustering not having the samples - clustered together +:math:`C_{10}` : number of pairs with the true label clustering having the +samples clustered together but the other clustering not having the samples +clustered together - :math:`C_{01}` : number of pairs with the true label clustering not having - the samples clustered together but the other clustering having the samples - clustered together +:math:`C_{01}` : number of pairs with the true label clustering not having +the samples clustered together but the other clustering having the samples +clustered together - :math:`C_{11}` : number of pairs with both clusterings having the samples - clustered together +:math:`C_{11}` : number of pairs with both clusterings having the samples +clustered together Considering a pair of samples that is clustered together a positive pair, then as in binary classification the count of true negatives is @@ -2235,7 +2223,7 @@ diagonal entries:: array([[ 0, 0], [12, 0]]) -.. topic:: References +.. dropdown:: References - * :doi:`"Comparing Partitions" <10.1007/BF01908075>` - L. Hubert and P. Arabie, Journal of Classification 1985 + * :doi:`"Comparing Partitions" <10.1007/BF01908075>` L. Hubert and P. Arabie, + Journal of Classification 1985 diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 5bcee9550b968..655ea551e0375 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -5,14 +5,24 @@ Pipelines and composite estimators ================================== -Transformers are usually combined with classifiers, regressors or other -estimators to build a composite estimator. The most common tool is a -:ref:`Pipeline `. Pipeline is often used in combination with -:ref:`FeatureUnion ` which concatenates the output of -transformers into a composite feature space. :ref:`TransformedTargetRegressor -` deals with transforming the :term:`target` -(i.e. log-transform :term:`y`). In contrast, Pipelines only transform the -observed data (:term:`X`). +To build a composite estimator, transformers are usually combined with other +transformers or with :term:`predictors` (such as classifiers or regressors). +The most common tool used for composing estimators is a :ref:`Pipeline +`. Pipelines require all steps except the last to be a +:term:`transformer`. The last step can be anything, a transformer, a +:term:`predictor`, or a clustering estimator which might have or not have a +`.predict(...)` method. A pipeline exposes all methods provided by the last +estimator: if the last step provides a `transform` method, then the pipeline +would have a `transform` method and behave like a transformer. If the last step +provides a `predict` method, then the pipeline would expose that method, and +given a data :term:`X`, use all steps except the last to transform the data, +and then give that transformed data to the `predict` method of the last step of +the pipeline. The class :class:`Pipeline` is often used in combination with +:ref:`ColumnTransformer ` or +:ref:`FeatureUnion ` which concatenate the output of transformers +into a composite feature space. +:ref:`TransformedTargetRegressor ` +deals with transforming the :term:`target` (i.e. log-transform :term:`y`). .. _pipeline: @@ -41,12 +51,21 @@ All estimators in a pipeline, except the last one, must be transformers (i.e. must have a :term:`transform` method). The last estimator may be any type (transformer, classifier, etc.). +.. note:: + + Calling ``fit`` on the pipeline is the same as calling ``fit`` on + each estimator in turn, ``transform`` the input and pass it on to the next step. + The pipeline has all the methods that the last estimator in the pipeline has, + i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used + as a classifier. If the last estimator is a transformer, again, so is the + pipeline. + Usage ----- -Construction -............ +Build a pipeline +................ The :class:`Pipeline` is built using a list of ``(key, value)`` pairs, where the ``key`` is a string containing the name you want to give this step and ``value`` @@ -60,38 +79,22 @@ is an estimator object:: >>> pipe Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())]) -The utility function :func:`make_pipeline` is a shorthand -for constructing pipelines; -it takes a variable number of estimators and returns a pipeline, -filling in the names automatically:: - - >>> from sklearn.pipeline import make_pipeline - >>> from sklearn.naive_bayes import MultinomialNB - >>> from sklearn.preprocessing import Binarizer - >>> make_pipeline(Binarizer(), MultinomialNB()) - Pipeline(steps=[('binarizer', Binarizer()), ('multinomialnb', MultinomialNB())]) - -Accessing steps -............... +.. dropdown:: Shorthand version using :func:`make_pipeline` -The estimators of a pipeline are stored as a list in the ``steps`` attribute, -but can be accessed by index or name by indexing (with ``[idx]``) the -Pipeline:: + The utility function :func:`make_pipeline` is a shorthand + for constructing pipelines; + it takes a variable number of estimators and returns a pipeline, + filling in the names automatically:: - >>> pipe.steps[0] - ('reduce_dim', PCA()) - >>> pipe[0] - PCA() - >>> pipe['reduce_dim'] - PCA() + >>> from sklearn.pipeline import make_pipeline + >>> make_pipeline(PCA(), SVC()) + Pipeline(steps=[('pca', PCA()), ('svc', SVC())]) -Pipeline's `named_steps` attribute allows accessing steps by name with tab -completion in interactive environments:: +Access pipeline steps +..................... - >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] - True - -A sub-pipeline can also be extracted using the slicing notation commonly used +The estimators of a pipeline are stored as a list in the ``steps`` attribute. +A sub-pipeline can be extracted using the slicing notation commonly used for Python Sequences such as lists or strings (although only a step of 1 is permitted). This is convenient for performing only some of the transformations (or their inverse): @@ -101,49 +104,33 @@ permitted). This is convenient for performing only some of the transformations >>> pipe[-1:] Pipeline(steps=[('clf', SVC())]) +.. dropdown:: Accessing a step by name or position -.. _pipeline_nested_parameters: - -Nested parameters -................. - -Parameters of the estimators in the pipeline can be accessed using the -``__`` syntax:: - - >>> pipe.set_params(clf__C=10) - Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))]) - -This is particularly important for doing grid searches:: - - >>> from sklearn.model_selection import GridSearchCV - >>> param_grid = dict(reduce_dim__n_components=[2, 5, 10], - ... clf__C=[0.1, 10, 100]) - >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) - -Individual steps may also be replaced as parameters, and non-final steps may be -ignored by setting them to ``'passthrough'``:: - - >>> from sklearn.linear_model import LogisticRegression - >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)], - ... clf=[SVC(), LogisticRegression()], - ... clf__C=[0.1, 10, 100]) - >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) + A specific step can also be accessed by index or name by indexing (with ``[idx]``) the + pipeline:: -The estimators of the pipeline can be retrieved by index: + >>> pipe.steps[0] + ('reduce_dim', PCA()) + >>> pipe[0] + PCA() + >>> pipe['reduce_dim'] + PCA() - >>> pipe[0] - PCA() + `Pipeline`'s `named_steps` attribute allows accessing steps by name with tab + completion in interactive environments:: -or by name:: + >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] + True - >>> pipe['reduce_dim'] - PCA() +Tracking feature names in a pipeline +.................................... To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a ``get_feature_names_out()`` method, just like all transformers. You can use pipeline slicing to get the feature names going into each step:: >>> from sklearn.datasets import load_iris + >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.feature_selection import SelectKBest >>> iris = load_iris() >>> pipe = Pipeline(steps=[ @@ -154,36 +141,60 @@ pipeline slicing to get the feature names going into each step:: >>> pipe[:-1].get_feature_names_out() array(['x2', 'x3'], ...) -You can also provide custom feature names for the input data using -``get_feature_names_out``:: +.. dropdown:: Customize feature names - >>> pipe[:-1].get_feature_names_out(iris.feature_names) - array(['petal length (cm)', 'petal width (cm)'], ...) + You can also provide custom feature names for the input data using + ``get_feature_names_out``:: -.. topic:: Examples: + >>> pipe[:-1].get_feature_names_out(iris.feature_names) + array(['petal length (cm)', 'petal width (cm)'], ...) - * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` - * :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` - * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py` +.. _pipeline_nested_parameters: -.. topic:: See Also: +Access to nested parameters +........................... - * :ref:`composite_grid_search` +It is common to adjust the parameters of an estimator within a pipeline. This parameter +is therefore nested because it belongs to a particular sub-step. Parameters of the +estimators in the pipeline are accessible using the ``__`` +syntax:: + >>> pipe = Pipeline(steps=[("reduce_dim", PCA()), ("clf", SVC())]) + >>> pipe.set_params(clf__C=10) + Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))]) -Notes ------ +.. dropdown:: When does it matter? + + This is particularly important for doing grid searches:: + + >>> from sklearn.model_selection import GridSearchCV + >>> param_grid = dict(reduce_dim__n_components=[2, 5, 10], + ... clf__C=[0.1, 10, 100]) + >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) + + Individual steps may also be replaced as parameters, and non-final steps may be + ignored by setting them to ``'passthrough'``:: + + >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)], + ... clf=[SVC(), LogisticRegression()], + ... clf__C=[0.1, 10, 100]) + >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) + + .. seealso:: + + * :ref:`composite_grid_search` + + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +* :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` +* :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py` -Calling ``fit`` on the pipeline is the same as calling ``fit`` on -each estimator in turn, ``transform`` the input and pass it on to the next step. -The pipeline has all the methods that the last estimator in the pipeline has, -i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used -as a classifier. If the last estimator is a transformer, again, so is the -pipeline. .. _pipeline_cache: @@ -219,47 +230,49 @@ object:: >>> # Clear the cache directory when you don't need it anymore >>> rmtree(cachedir) -.. warning:: **Side effect of caching transformers** - - Using a :class:`Pipeline` without cache enabled, it is possible to - inspect the original instance such as:: - - >>> from sklearn.datasets import load_digits - >>> X_digits, y_digits = load_digits(return_X_y=True) - >>> pca1 = PCA() - >>> svm1 = SVC() - >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) - >>> pipe.fit(X_digits, y_digits) - Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())]) - >>> # The pca instance can be inspected directly - >>> print(pca1.components_) - [[-1.77484909e-19 ... 4.07058917e-18]] - - Enabling caching triggers a clone of the transformers before fitting. - Therefore, the transformer instance given to the pipeline cannot be - inspected directly. - In following example, accessing the :class:`PCA` instance ``pca2`` - will raise an ``AttributeError`` since ``pca2`` will be an unfitted - transformer. - Instead, use the attribute ``named_steps`` to inspect estimators within - the pipeline:: - - >>> cachedir = mkdtemp() - >>> pca2 = PCA() - >>> svm2 = SVC() - >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], - ... memory=cachedir) - >>> cached_pipe.fit(X_digits, y_digits) - Pipeline(memory=..., - steps=[('reduce_dim', PCA()), ('clf', SVC())]) - >>> print(cached_pipe.named_steps['reduce_dim'].components_) - [[-1.77484909e-19 ... 4.07058917e-18]] - >>> # Remove the cache directory - >>> rmtree(cachedir) - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` +.. dropdown:: Side effect of caching transformers + :color: warning + + Using a :class:`Pipeline` without cache enabled, it is possible to + inspect the original instance such as:: + + >>> from sklearn.datasets import load_digits + >>> X_digits, y_digits = load_digits(return_X_y=True) + >>> pca1 = PCA(n_components=10) + >>> svm1 = SVC() + >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) + >>> pipe.fit(X_digits, y_digits) + Pipeline(steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> # The pca instance can be inspected directly + >>> pca1.components_.shape + (10, 64) + + Enabling caching triggers a clone of the transformers before fitting. + Therefore, the transformer instance given to the pipeline cannot be + inspected directly. + In following example, accessing the :class:`~sklearn.decomposition.PCA` + instance ``pca2`` will raise an ``AttributeError`` since ``pca2`` will be an + unfitted transformer. + Instead, use the attribute ``named_steps`` to inspect estimators within + the pipeline:: + + >>> cachedir = mkdtemp() + >>> pca2 = PCA(n_components=10) + >>> svm2 = SVC() + >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], + ... memory=cachedir) + >>> cached_pipe.fit(X_digits, y_digits) + Pipeline(memory=..., + steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> cached_pipe.named_steps['reduce_dim'].components_.shape + (10, 64) + >>> # Remove the cache directory + >>> rmtree(cachedir) + + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` .. _transformed_target_regressor: @@ -332,9 +345,9 @@ each other. However, it is possible to bypass this checking by setting pair of functions ``func`` and ``inverse_func``. However, setting both options will raise an error. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` +* :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` .. _feature_union: @@ -396,9 +409,9 @@ and ignored by setting to ``'drop'``:: FeatureUnion(transformer_list=[('linear_pca', PCA()), ('kernel_pca', 'drop')]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py` +* :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py` .. _column_transformer: @@ -591,7 +604,7 @@ As an alternative, the HTML can be written to a file using >>> with open('my_estimator.html', 'w') as f: # doctest: +SKIP ... f.write(estimator_html_repr(clf)) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` - * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` +* :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` +* :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index 50927f9a677f6..847e489c87333 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -40,11 +40,10 @@ on whether the data are centered, so one may want to use the same mean vector as the training set. If not, both should be centered by the user, and ``assume_centered=True`` should be used. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit an :class:`EmpiricalCovariance` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit an :class:`EmpiricalCovariance` object to data. .. _shrunk_covariance: @@ -84,11 +83,10 @@ Tr}\hat{\Sigma}}{p}\rm Id`. Choosing the amount of shrinkage, :math:`\alpha` amounts to setting a bias/variance trade-off, and is discussed below. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit a :class:`ShrunkCovariance` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit a :class:`ShrunkCovariance` object to data. Ledoit-Wolf shrinkage @@ -121,18 +119,18 @@ fitting a :class:`LedoitWolf` object to the same sample. Since the population covariance is already a multiple of the identity matrix, the Ledoit-Wolf solution is indeed a reasonable estimate. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit a :class:`LedoitWolf` object to data and - for visualizing the performances of the Ledoit-Wolf estimator in - terms of likelihood. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit a :class:`LedoitWolf` object to data and + for visualizing the performances of the Ledoit-Wolf estimator in + terms of likelihood. -.. topic:: References: +.. rubric:: References - .. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional - Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, - February 2004, pages 365-411. +.. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional + Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, + February 2004, pages 365-411. .. _oracle_approximating_shrinkage: @@ -158,22 +156,21 @@ object to the same sample. Bias-variance trade-off when setting the shrinkage: comparing the choices of Ledoit-Wolf and OAS estimators -.. topic:: References: +.. rubric:: References - .. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", - Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. - IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. - <0907.4698>` +.. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit an :class:`OAS` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit an :class:`OAS` object to data. - * See :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py` to visualize the - Mean Squared Error difference between a :class:`LedoitWolf` and - an :class:`OAS` estimator of the covariance. +* See :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py` to visualize the + Mean Squared Error difference between a :class:`LedoitWolf` and + an :class:`OAS` estimator of the covariance. .. figure:: ../auto_examples/covariance/images/sphx_glr_plot_lw_vs_oas_001.png @@ -254,20 +251,20 @@ problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R ``glasso`` package. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_covariance_plot_sparse_cov.py`: example on synthetic - data showing some recovery of a structure, and comparing to other - covariance estimators. +* :ref:`sphx_glr_auto_examples_covariance_plot_sparse_cov.py`: example on synthetic + data showing some recovery of a structure, and comparing to other + covariance estimators. - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py`: example on real - stock market data, finding which symbols are most linked. +* :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py`: example on real + stock market data, finding which symbols are most linked. -.. topic:: References: +.. rubric:: References - * Friedman et al, `"Sparse inverse covariance estimation with the - graphical lasso" `_, - Biostatistics 9, pp 432, 2008 +* Friedman et al, `"Sparse inverse covariance estimation with the + graphical lasso" `_, + Biostatistics 9, pp 432, 2008 .. _robust_covariance: @@ -313,24 +310,24 @@ the same time. Raw estimates can be accessed as ``raw_location_`` and ``raw_covariance_`` attributes of a :class:`MinCovDet` robust covariance estimator object. -.. topic:: References: +.. rubric:: References - .. [3] P. J. Rousseeuw. Least median of squares regression. - J. Am Stat Ass, 79:871, 1984. - .. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, - 1999, American Statistical Association and the American Society - for Quality, TECHNOMETRICS. +.. [3] P. J. Rousseeuw. Least median of squares regression. + J. Am Stat Ass, 79:871, 1984. +.. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, + 1999, American Statistical Association and the American Society + for Quality, TECHNOMETRICS. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_robust_vs_empirical_covariance.py` for - an example on how to fit a :class:`MinCovDet` object to data and see how - the estimate remains accurate despite the presence of outliers. +* See :ref:`sphx_glr_auto_examples_covariance_plot_robust_vs_empirical_covariance.py` for + an example on how to fit a :class:`MinCovDet` object to data and see how + the estimate remains accurate despite the presence of outliers. - * See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` to - visualize the difference between :class:`EmpiricalCovariance` and - :class:`MinCovDet` covariance estimators in terms of Mahalanobis distance - (so we get a better estimate of the precision matrix too). +* See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` to + visualize the difference between :class:`EmpiricalCovariance` and + :class:`MinCovDet` covariance estimators in terms of Mahalanobis distance + (so we get a better estimate of the precision matrix too). .. |robust_vs_emp| image:: ../auto_examples/covariance/images/sphx_glr_plot_robust_vs_empirical_covariance_001.png :target: ../auto_examples/covariance/plot_robust_vs_empirical_covariance.html diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst index 337a7bcd250bb..2d630de699c7a 100644 --- a/doc/modules/cross_decomposition.rst +++ b/doc/modules/cross_decomposition.rst @@ -92,38 +92,35 @@ Step *a)* may be performed in two ways: either by computing the whole SVD of values, or by directly computing the singular vectors using the power method (cf section 11.3 in [1]_), which corresponds to the `'nipals'` option of the `algorithm` parameter. +.. dropdown:: Transforming data -Transforming data -^^^^^^^^^^^^^^^^^ + To transform :math:`X` into :math:`\bar{X}`, we need to find a projection + matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the + training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting + :math:`P = U(\Gamma^T U)^{-1}` where :math:`U` is the matrix with the + :math:`u_k` in the columns, we have :math:`XP = X U(\Gamma^T U)^{-1} = \Xi + (\Gamma^T U) (\Gamma^T U)^{-1} = \Xi` as desired. The rotation matrix + :math:`P` can be accessed from the `x_rotations_` attribute. -To transform :math:`X` into :math:`\bar{X}`, we need to find a projection -matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the -training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting -:math:`P = U(\Gamma^T U)^{-1}` where :math:`U` is the matrix with the -:math:`u_k` in the columns, we have :math:`XP = X U(\Gamma^T U)^{-1} = \Xi -(\Gamma^T U) (\Gamma^T U)^{-1} = \Xi` as desired. The rotation matrix -:math:`P` can be accessed from the `x_rotations_` attribute. + Similarly, :math:`Y` can be transformed using the rotation matrix + :math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute. -Similarly, :math:`Y` can be transformed using the rotation matrix -:math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute. +.. dropdown:: Predicting the targets `Y` -Predicting the targets Y -^^^^^^^^^^^^^^^^^^^^^^^^ + To predict the targets of some data :math:`X`, we are looking for a + coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y = + X\beta`. -To predict the targets of some data :math:`X`, we are looking for a -coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y = -X\beta`. + The idea is to try to predict the transformed targets :math:`\Omega` as a + function of the transformed samples :math:`\Xi`, by computing :math:`\alpha + \in \mathbb{R}` such that :math:`\Omega = \alpha \Xi`. -The idea is to try to predict the transformed targets :math:`\Omega` as a -function of the transformed samples :math:`\Xi`, by computing :math:`\alpha -\in \mathbb{R}` such that :math:`\Omega = \alpha \Xi`. + Then, we have :math:`Y = \Omega \Delta^T = \alpha \Xi \Delta^T`, and since + :math:`\Xi` is the transformed training data we have that :math:`Y = X \alpha + P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P + \Delta^T`. -Then, we have :math:`Y = \Omega \Delta^T = \alpha \Xi \Delta^T`, and since -:math:`\Xi` is the transformed training data we have that :math:`Y = X \alpha -P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P -\Delta^T`. - -:math:`\beta` can be accessed through the `coef_` attribute. + :math:`\beta` can be accessed through the `coef_` attribute. PLSSVD ------ @@ -180,15 +177,13 @@ Since :class:`CCA` involves the inversion of :math:`X_k^TX_k` and :math:`Y_k^TY_k`, this estimator can be unstable if the number of features or targets is greater than the number of samples. +.. rubric:: References -.. topic:: Reference: - - .. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on - the two-block case - `_ - JA Wegelin +.. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block + case `_, + JA Wegelin -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` - * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py` +* :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` +* :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py` diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 6158e000cb727..defcd91a6008a 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -86,10 +86,10 @@ the training set is split into *k* smaller sets but generally follow the same principles). The following procedure is followed for each of the *k* "folds": - * A model is trained using :math:`k-1` of the folds as training data; - * the resulting model is validated on the remaining part of the data - (i.e., it is used as a test set to compute a performance measure - such as accuracy). +* A model is trained using :math:`k-1` of the folds as training data; +* the resulting model is validated on the remaining part of the data + (i.e., it is used as a test set to compute a performance measure + such as accuracy). The performance measure reported by *k*-fold cross-validation is then the average of the values computed in the loop. @@ -102,6 +102,7 @@ where the number of samples is very small. .. image:: ../images/grid_search_cross_validation.png :width: 500px :height: 300px + :alt: A depiction of a 5 fold cross validation on a training set, while holding out a test set. :align: center Computing cross-validated metrics @@ -169,32 +170,32 @@ indices, for example:: >>> cross_val_score(clf, X, y, cv=custom_cv) array([1. , 0.973...]) -.. topic:: Data transformation with held out data +.. dropdown:: Data transformation with held-out data - Just as it is important to test a predictor on data held-out from - training, preprocessing (such as standardization, feature selection, etc.) - and similar :ref:`data transformations ` similarly should - be learnt from a training set and applied to held-out data for prediction:: + Just as it is important to test a predictor on data held-out from + training, preprocessing (such as standardization, feature selection, etc.) + and similar :ref:`data transformations ` similarly should + be learnt from a training set and applied to held-out data for prediction:: - >>> from sklearn import preprocessing - >>> X_train, X_test, y_train, y_test = train_test_split( - ... X, y, test_size=0.4, random_state=0) - >>> scaler = preprocessing.StandardScaler().fit(X_train) - >>> X_train_transformed = scaler.transform(X_train) - >>> clf = svm.SVC(C=1).fit(X_train_transformed, y_train) - >>> X_test_transformed = scaler.transform(X_test) - >>> clf.score(X_test_transformed, y_test) - 0.9333... + >>> from sklearn import preprocessing + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, test_size=0.4, random_state=0) + >>> scaler = preprocessing.StandardScaler().fit(X_train) + >>> X_train_transformed = scaler.transform(X_train) + >>> clf = svm.SVC(C=1).fit(X_train_transformed, y_train) + >>> X_test_transformed = scaler.transform(X_test) + >>> clf.score(X_test_transformed, y_test) + 0.9333... - A :class:`Pipeline ` makes it easier to compose - estimators, providing this behavior under cross-validation:: + A :class:`Pipeline ` makes it easier to compose + estimators, providing this behavior under cross-validation:: - >>> from sklearn.pipeline import make_pipeline - >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1)) - >>> cross_val_score(clf, X, y, cv=cv) - array([0.977..., 0.933..., 0.955..., 0.933..., 0.977...]) + >>> from sklearn.pipeline import make_pipeline + >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1)) + >>> cross_val_score(clf, X, y, cv=cv) + array([0.977..., 0.933..., 0.955..., 0.933..., 0.977...]) - See :ref:`combining_estimators`. + See :ref:`combining_estimators`. .. _multimetric_cross_validation: @@ -290,14 +291,14 @@ The function :func:`cross_val_predict` is appropriate for: The available cross validation iterators are introduced in the following section. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`, - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. +* :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`, +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. Cross validation iterators ========================== @@ -438,20 +439,19 @@ then 5- or 10- fold cross validation can overestimate the generalization error. As a general rule, most authors, and empirical evidence, suggest that 5- or 10- fold cross validation should be preferred to LOO. +.. dropdown:: References -.. topic:: References: - - * ``_; - * T. Hastie, R. Tibshirani, J. Friedman, `The Elements of Statistical Learning - `_, Springer 2009 - * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case - `_, International Statistical Review 1992; - * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection - `_, Intl. Jnt. Conf. AI - * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation - `_, SIAM 2008; - * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to - Statistical Learning `_, Springer 2013. + * ``_; + * T. Hastie, R. Tibshirani, J. Friedman, `The Elements of Statistical Learning + `_, Springer 2009 + * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case + `_, International Statistical Review 1992; + * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection + `_, Intl. Jnt. Conf. AI + * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation + `_, SIAM 2008; + * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to + Statistical Learning `_, Springer 2013. .. _leave_p_out: @@ -520,8 +520,8 @@ the proportion of samples on each side of the train / test split. .. _stratification: -Cross-validation iterators with stratification based on class labels. ---------------------------------------------------------------------- +Cross-validation iterators with stratification based on class labels +-------------------------------------------------------------------- Some classification problems can exhibit a large imbalance in the distribution of the target classes: for instance there could be several times more negative @@ -590,6 +590,19 @@ Here is a visualization of the cross-validation behavior. :align: center :scale: 75% +.. _predefined_split: + +Predefined fold-splits / Validation-sets +---------------------------------------- + +For some datasets, a pre-defined split of the data into training- and +validation fold or into several cross-validation folds already +exists. Using :class:`PredefinedSplit` it is possible to use these folds +e.g. when searching for hyperparameters. + +For example, when using a validation set, set the ``test_fold`` to 0 for all +samples that are part of the validation set, and to -1 for all other samples. + .. _group_cv: Cross-validation iterators for grouped data @@ -680,26 +693,27 @@ Example:: [ 0 1 4 5 6 7 8 9 11 12 13 14] [ 2 3 10 15 16 17] [ 1 2 3 8 9 10 12 13 14 15 16 17] [ 0 4 5 6 7 11] -Implementation notes: +.. dropdown:: Implementation notes -- With the current implementation full shuffle is not possible in most - scenarios. When shuffle=True, the following happens: + - With the current implementation full shuffle is not possible in most + scenarios. When shuffle=True, the following happens: - 1. All groups are shuffled. - 2. Groups are sorted by standard deviation of classes using stable sort. - 3. Sorted groups are iterated over and assigned to folds. + 1. All groups are shuffled. + 2. Groups are sorted by standard deviation of classes using stable sort. + 3. Sorted groups are iterated over and assigned to folds. + + That means that only groups with the same standard deviation of class + distribution will be shuffled, which might be useful when each group has only + a single class. + - The algorithm greedily assigns each group to one of n_splits test sets, + choosing the test set that minimises the variance in class distribution + across test sets. Group assignment proceeds from groups with highest to + lowest variance in class frequency, i.e. large groups peaked on one or few + classes are assigned first. + - This split is suboptimal in a sense that it might produce imbalanced splits + even if perfect stratification is possible. If you have relatively close + distribution of classes in each group, using :class:`GroupKFold` is better. - That means that only groups with the same standard deviation of class - distribution will be shuffled, which might be useful when each group has only - a single class. -- The algorithm greedily assigns each group to one of n_splits test sets, - choosing the test set that minimises the variance in class distribution - across test sets. Group assignment proceeds from groups with highest to - lowest variance in class frequency, i.e. large groups peaked on one or few - classes are assigned first. -- This split is suboptimal in a sense that it might produce imbalanced splits - even if perfect stratification is possible. If you have relatively close - distribution of classes in each group, using :class:`GroupKFold` is better. Here is a visualization of cross-validation behavior for uneven groups: @@ -807,19 +821,6 @@ expensive. In such a scenario, :class:`GroupShuffleSplit` provides a random sample (with replacement) of the train / test splits generated by :class:`LeavePGroupsOut`. -.. _predefined_split: - -Predefined Fold-Splits / Validation-Sets ----------------------------------------- - -For some datasets, a pre-defined split of the data into training- and -validation fold or into several cross-validation folds already -exists. Using :class:`PredefinedSplit` it is possible to use these folds -e.g. when searching for hyperparameters. - -For example, when using a validation set, set the ``test_fold`` to 0 for all -samples that are part of the validation set, and to -1 for all other samples. - Using cross-validation iterators to split train and test -------------------------------------------------------- @@ -988,12 +989,12 @@ using brute force and internally fits ``(n_permutations + 1) * n_cv`` models. It is therefore only tractable with small datasets for which fitting an individual model is very fast. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` -.. topic:: References: +.. dropdown:: References - * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance - `_. - J. Mach. Learn. Res. 2010. + * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance + `_. + J. Mach. Learn. Res. 2010. diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 6a55895b65f07..926a4482f1428 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -51,10 +51,11 @@ data based on the amount of variance it explains. As such it implements a :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` .. _IncrementalPCA: @@ -71,11 +72,11 @@ exactly match the results of :class:`PCA` while processing the data in a minibatch fashion. :class:`IncrementalPCA` makes it possible to implement out-of-core Principal Component Analysis either by: - * Using its ``partial_fit`` method on chunks of data fetched sequentially - from the local hard drive or a network database. +* Using its ``partial_fit`` method on chunks of data fetched sequentially + from the local hard drive or a network database. - * Calling its fit method on a sparse matrix or a memory mapped file using - ``numpy.memmap``. +* Calling its fit method on a memory mapped file using + ``numpy.memmap``. :class:`IncrementalPCA` only stores estimates of component and noise variances, in order update ``explained_variance_ratio_`` incrementally. This is why @@ -96,9 +97,9 @@ input data for each feature before applying the SVD. :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py` .. _RandomizedPCA: @@ -159,20 +160,20 @@ Note: the implementation of ``inverse_transform`` in :class:`PCA` with ``transform`` even when ``whiten=False`` (default). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` -.. topic:: References: +.. rubric:: References - * Algorithm 4.3 in - :arxiv:`"Finding structure with randomness: Stochastic algorithms for - constructing approximate matrix decompositions" <0909.4061>` - Halko, et al., 2009 +* Algorithm 4.3 in + :arxiv:`"Finding structure with randomness: Stochastic algorithms for + constructing approximate matrix decompositions" <0909.4061>` + Halko, et al., 2009 - * :arxiv:`"An implementation of a randomized algorithm for principal component - analysis" <1412.3510>` A. Szlam et al. 2014 +* :arxiv:`"An implementation of a randomized algorithm for principal component + analysis" <1412.3510>` A. Szlam et al. 2014 .. _SparsePCA: @@ -247,18 +248,18 @@ factorization, while larger values shrink many coefficients to zero. the algorithm is online along the features direction, not the samples direction. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` -.. topic:: References: +.. rubric:: References - .. [Mrl09] `"Online Dictionary Learning for Sparse Coding" - `_ - J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 - .. [Jen09] `"Structured Sparse Principal Component Analysis" - `_ - R. Jenatton, G. Obozinski, F. Bach, 2009 +.. [Mrl09] `"Online Dictionary Learning for Sparse Coding" + `_ + J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 +.. [Jen09] `"Structured Sparse Principal Component Analysis" + `_ + R. Jenatton, G. Obozinski, F. Bach, 2009 .. _kernel_PCA: @@ -287,22 +288,23 @@ prediction (kernel dependency estimation). :class:`KernelPCA` supports both :meth:`KernelPCA.inverse_transform` is an approximation. See the example linked below for more details. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py` +* :ref:`sphx_glr_auto_examples_applications_plot_digits_denoising.py` -.. topic:: References: +.. rubric:: References - .. [Scholkopf1997] SchÃļlkopf, Bernhard, Alexander Smola, and Klaus-Robert MÃŧller. - `"Kernel principal component analysis." - `_ - International conference on artificial neural networks. - Springer, Berlin, Heidelberg, 1997. +.. [Scholkopf1997] SchÃļlkopf, Bernhard, Alexander Smola, and Klaus-Robert MÃŧller. + `"Kernel principal component analysis." + `_ + International conference on artificial neural networks. + Springer, Berlin, Heidelberg, 1997. - .. [Bakir2003] BakÄąr, GÃļkhan H., Jason Weston, and Bernhard SchÃļlkopf. - `"Learning to find pre-images." - `_ - Advances in neural information processing systems 16 (2003): 449-456. +.. [Bakir2003] BakÄąr, GÃļkhan H., Jason Weston, and Bernhard SchÃļlkopf. + `"Learning to find pre-images." + `_ + Advances in neural information processing systems 16 (2003): 449-456. .. _kPCA_Solvers: @@ -319,31 +321,34 @@ is eigendecomposed in the Kernel PCA fitting process has an effective rank that is much smaller than its size. This is a situation where approximate eigensolvers can provide speedup with very low precision loss. -The optional parameter ``eigen_solver='randomized'`` can be used to -*significantly* reduce the computation time when the number of requested -``n_components`` is small compared with the number of samples. It relies on -randomized decomposition methods to find an approximate solution in a shorter -time. -The time complexity of the randomized :class:`KernelPCA` is -:math:`O(n_{\mathrm{samples}}^2 \cdot n_{\mathrm{components}})` -instead of :math:`O(n_{\mathrm{samples}}^3)` for the exact method -implemented with ``eigen_solver='dense'``. +.. dropdown:: Eigensolvers -The memory footprint of randomized :class:`KernelPCA` is also proportional to -:math:`2 \cdot n_{\mathrm{samples}} \cdot n_{\mathrm{components}}` instead of -:math:`n_{\mathrm{samples}}^2` for the exact method. + The optional parameter ``eigen_solver='randomized'`` can be used to + *significantly* reduce the computation time when the number of requested + ``n_components`` is small compared with the number of samples. It relies on + randomized decomposition methods to find an approximate solution in a shorter + time. -Note: this technique is the same as in :ref:`RandomizedPCA`. + The time complexity of the randomized :class:`KernelPCA` is + :math:`O(n_{\mathrm{samples}}^2 \cdot n_{\mathrm{components}})` + instead of :math:`O(n_{\mathrm{samples}}^3)` for the exact method + implemented with ``eigen_solver='dense'``. -In addition to the above two solvers, ``eigen_solver='arpack'`` can be used as -an alternate way to get an approximate decomposition. In practice, this method -only provides reasonable execution times when the number of components to find -is extremely small. It is enabled by default when the desired number of -components is less than 10 (strict) and the number of samples is more than 200 -(strict). See :class:`KernelPCA` for details. + The memory footprint of randomized :class:`KernelPCA` is also proportional to + :math:`2 \cdot n_{\mathrm{samples}} \cdot n_{\mathrm{components}}` instead of + :math:`n_{\mathrm{samples}}^2` for the exact method. -.. topic:: References: + Note: this technique is the same as in :ref:`RandomizedPCA`. + + In addition to the above two solvers, ``eigen_solver='arpack'`` can be used as + an alternate way to get an approximate decomposition. In practice, this method + only provides reasonable execution times when the number of components to find + is extremely small. It is enabled by default when the desired number of + components is less than 10 (strict) and the number of samples is more than 200 + (strict). See :class:`KernelPCA` for details. + + .. rubric:: References * *dense* solver: `scipy.linalg.eigh documentation @@ -351,14 +356,14 @@ components is less than 10 (strict) and the number of samples is more than 200 * *randomized* solver: - * Algorithm 4.3 in - :arxiv:`"Finding structure with randomness: Stochastic - algorithms for constructing approximate matrix decompositions" <0909.4061>` - Halko, et al. (2009) + * Algorithm 4.3 in + :arxiv:`"Finding structure with randomness: Stochastic + algorithms for constructing approximate matrix decompositions" <0909.4061>` + Halko, et al. (2009) - * :arxiv:`"An implementation of a randomized algorithm - for principal component analysis" <1412.3510>` - A. Szlam et al. (2014) + * :arxiv:`"An implementation of a randomized algorithm + for principal component analysis" <1412.3510>` + A. Szlam et al. (2014) * *arpack* solver: `scipy.sparse.linalg.eigsh documentation @@ -375,75 +380,74 @@ Truncated singular value decomposition and latent semantic analysis (SVD) that only computes the :math:`k` largest singular values, where :math:`k` is a user-specified parameter. -When truncated SVD is applied to term-document matrices -(as returned by :class:`~sklearn.feature_extraction.text.CountVectorizer` or -:class:`~sklearn.feature_extraction.text.TfidfVectorizer`), -this transformation is known as -`latent semantic analysis `_ -(LSA), because it transforms such matrices -to a "semantic" space of low dimensionality. -In particular, LSA is known to combat the effects of synonymy and polysemy -(both of which roughly mean there are multiple meanings per word), -which cause term-document matrices to be overly sparse -and exhibit poor similarity under measures such as cosine similarity. +:class:`TruncatedSVD` is very similar to :class:`PCA`, but differs +in that the matrix :math:`X` does not need to be centered. +When the columnwise (per-feature) means of :math:`X` +are subtracted from the feature values, +truncated SVD on the resulting matrix is equivalent to PCA. -.. note:: - LSA is also known as latent semantic indexing, LSI, - though strictly that refers to its use in persistent indexes - for information retrieval purposes. +.. dropdown:: About truncated SVD and latent semantic analysis (LSA) -Mathematically, truncated SVD applied to training samples :math:`X` -produces a low-rank approximation :math:`X`: + When truncated SVD is applied to term-document matrices + (as returned by :class:`~sklearn.feature_extraction.text.CountVectorizer` or + :class:`~sklearn.feature_extraction.text.TfidfVectorizer`), + this transformation is known as + `latent semantic analysis `_ + (LSA), because it transforms such matrices + to a "semantic" space of low dimensionality. + In particular, LSA is known to combat the effects of synonymy and polysemy + (both of which roughly mean there are multiple meanings per word), + which cause term-document matrices to be overly sparse + and exhibit poor similarity under measures such as cosine similarity. -.. math:: - X \approx X_k = U_k \Sigma_k V_k^\top + .. note:: + LSA is also known as latent semantic indexing, LSI, + though strictly that refers to its use in persistent indexes + for information retrieval purposes. -After this operation, :math:`U_k \Sigma_k` -is the transformed training set with :math:`k` features -(called ``n_components`` in the API). + Mathematically, truncated SVD applied to training samples :math:`X` + produces a low-rank approximation :math:`X`: -To also transform a test set :math:`X`, we multiply it with :math:`V_k`: + .. math:: + X \approx X_k = U_k \Sigma_k V_k^\top -.. math:: - X' = X V_k + After this operation, :math:`U_k \Sigma_k` + is the transformed training set with :math:`k` features + (called ``n_components`` in the API). -.. note:: - Most treatments of LSA in the natural language processing (NLP) - and information retrieval (IR) literature - swap the axes of the matrix :math:`X` so that it has shape - ``n_features`` × ``n_samples``. - We present LSA in a different way that matches the scikit-learn API better, - but the singular values found are the same. + To also transform a test set :math:`X`, we multiply it with :math:`V_k`: -:class:`TruncatedSVD` is very similar to :class:`PCA`, but differs -in that the matrix :math:`X` does not need to be centered. -When the columnwise (per-feature) means of :math:`X` -are subtracted from the feature values, -truncated SVD on the resulting matrix is equivalent to PCA. -In practical terms, this means -that the :class:`TruncatedSVD` transformer accepts ``scipy.sparse`` -matrices without the need to densify them, -as densifying may fill up memory even for medium-sized document collections. + .. math:: + X' = X V_k -While the :class:`TruncatedSVD` transformer -works with any feature matrix, -using it on tf–idf matrices is recommended over raw frequency counts -in an LSA/document processing setting. -In particular, sublinear scaling and inverse document frequency -should be turned on (``sublinear_tf=True, use_idf=True``) -to bring the feature values closer to a Gaussian distribution, -compensating for LSA's erroneous assumptions about textual data. + .. note:: + Most treatments of LSA in the natural language processing (NLP) + and information retrieval (IR) literature + swap the axes of the matrix :math:`X` so that it has shape + ``(n_features, n_samples)``. + We present LSA in a different way that matches the scikit-learn API better, + but the singular values found are the same. -.. topic:: Examples: + While the :class:`TruncatedSVD` transformer + works with any feature matrix, + using it on tf-idf matrices is recommended over raw frequency counts + in an LSA/document processing setting. + In particular, sublinear scaling and inverse document frequency + should be turned on (``sublinear_tf=True, use_idf=True``) + to bring the feature values closer to a Gaussian distribution, + compensating for LSA's erroneous assumptions about textual data. - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` +.. rubric:: Examples -.. topic:: References: +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` + +.. rubric:: References + +* Christopher D. Manning, Prabhakar Raghavan and Hinrich SchÃŧtze (2008), + *Introduction to Information Retrieval*, Cambridge University Press, + chapter 18: `Matrix decompositions & latent semantic indexing + `_ - * Christopher D. Manning, Prabhakar Raghavan and Hinrich SchÃŧtze (2008), - *Introduction to Information Retrieval*, Cambridge University Press, - chapter 18: `Matrix decompositions & latent semantic indexing - `_ .. _DictionaryLearning: @@ -496,9 +500,9 @@ the split code is filled with the negative part of the code vector, only with a positive sign. Therefore, the split_code is non-negative. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_sparse_coding.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_sparse_coding.py` Generic dictionary learning @@ -578,16 +582,16 @@ extracted from part of the image of a raccoon face looks like. :scale: 50% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_image_denoising.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_image_denoising.py` -.. topic:: References: +.. rubric:: References - * `"Online dictionary learning for sparse coding" - `_ - J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 +* `"Online dictionary learning for sparse coding" + `_ + J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 .. _MiniBatchDictionaryLearning: @@ -623,7 +627,7 @@ does not fit into the memory. computationally efficient and implements on-line learning with a ``partial_fit`` method. - Example: :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` + Example: :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` .. currentmodule:: sklearn.decomposition @@ -718,10 +722,10 @@ Varimax rotation maximizes the sum of the variances of the squared loadings, i.e., it tends to produce sparser factors, which are influenced by only a few features each (the "simple structure"). See e.g., the first example below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_varimax_fa.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_varimax_fa.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` .. _ICA: @@ -760,11 +764,11 @@ components with some sparsity: .. centered:: |pca_img4| |ica_img4| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_ica_blind_source_separation.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_ica_vs_pca.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_ica_blind_source_separation.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_ica_vs_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` .. _NMF: @@ -808,7 +812,7 @@ faces dataset, in comparison with the PCA eigenfaces. .. centered:: |pca_img5| |nmf_img5| -The :attr:`init` attribute determines the initialization method applied, which +The `init` attribute determines the initialization method applied, which has a great impact on the performance of the method. :class:`NMF` implements the method Nonnegative Double Singular Value Decomposition. NNDSVD [4]_ is based on two SVD processes, one approximating the data matrix, the other approximating @@ -825,20 +829,20 @@ basic NNDSVD algorithm which introduces a lot of zeros; in this case, NNDSVDa or NNDSVDar should be preferred. :class:`NMF` can also be initialized with correctly scaled random non-negative -matrices by setting :attr:`init="random"`. An integer seed or a -``RandomState`` can also be passed to :attr:`random_state` to control +matrices by setting `init="random"`. An integer seed or a +``RandomState`` can also be passed to `random_state` to control reproducibility. -In :class:`NMF`, L1 and L2 priors can be added to the loss function in order -to regularize the model. The L2 prior uses the Frobenius norm, while the L1 -prior uses an elementwise L1 norm. As in :class:`ElasticNet`, we control the -combination of L1 and L2 with the :attr:`l1_ratio` (:math:`\rho`) parameter, -and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alpha_H` -(:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number -of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`) -for `W` to keep their impact balanced with respect to one another and to the data fit -term as independent as possible of the size of the training set. Then the priors terms -are: +In :class:`NMF`, L1 and L2 priors can be added to the loss function in order to +regularize the model. The L2 prior uses the Frobenius norm, while the L1 prior +uses an elementwise L1 norm. As in :class:`~sklearn.linear_model.ElasticNet`, +we control the combination of L1 and L2 with the `l1_ratio` (:math:`\rho`) +parameter, and the intensity of the regularization with the `alpha_W` and +`alpha_H` (:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are +scaled by the number of samples (:math:`n\_samples`) for `H` and the number of +features (:math:`n\_features`) for `W` to keep their impact balanced with +respect to one another and to the data fit term as independent as possible of +the size of the training set. Then the priors terms are: .. math:: (\alpha_W \rho ||W||_1 + \frac{\alpha_W(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2) * n\_features @@ -887,18 +891,20 @@ Note that this definition is not valid if :math:`\beta \in (0; 1)`, yet it can be continuously extended to the definitions of :math:`d_{KL}` and :math:`d_{IS}` respectively. -:class:`NMF` implements two solvers, using Coordinate Descent ('cd') [5]_, and -Multiplicative Update ('mu') [6]_. The 'mu' solver can optimize every -beta-divergence, including of course the Frobenius norm (:math:`\beta=2`), the -(generalized) Kullback-Leibler divergence (:math:`\beta=1`) and the -Itakura-Saito divergence (:math:`\beta=0`). Note that for -:math:`\beta \in (1; 2)`, the 'mu' solver is significantly faster than for other -values of :math:`\beta`. Note also that with a negative (or 0, i.e. -'itakura-saito') :math:`\beta`, the input matrix cannot contain zero values. +.. dropdown:: NMF implemented solvers + + :class:`NMF` implements two solvers, using Coordinate Descent ('cd') [5]_, and + Multiplicative Update ('mu') [6]_. The 'mu' solver can optimize every + beta-divergence, including of course the Frobenius norm (:math:`\beta=2`), the + (generalized) Kullback-Leibler divergence (:math:`\beta=1`) and the + Itakura-Saito divergence (:math:`\beta=0`). Note that for + :math:`\beta \in (1; 2)`, the 'mu' solver is significantly faster than for other + values of :math:`\beta`. Note also that with a negative (or 0, i.e. + 'itakura-saito') :math:`\beta`, the input matrix cannot contain zero values. -The 'cd' solver can only optimize the Frobenius norm. Due to the -underlying non-convexity of NMF, the different solvers may converge to -different minima, even when optimizing the same distance function. + The 'cd' solver can only optimize the Frobenius norm. Due to the + underlying non-convexity of NMF, the different solvers may converge to + different minima, even when optimizing the same distance function. NMF is best used with the ``fit_transform`` method, which returns the matrix W. The matrix H is stored into the fitted model in the ``components_`` attribute; @@ -914,10 +920,12 @@ stored components:: >>> X_new = np.array([[1, 0], [1, 6.1], [1, 0], [1, 4], [3.2, 1], [0, 4]]) >>> W_new = model.transform(X_new) -.. topic:: Examples: - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` .. _MiniBatchNMF: @@ -942,33 +950,33 @@ The estimator also implements ``partial_fit``, which updates ``H`` by iterating only once over a mini-batch. This can be used for online learning when the data is not readily available from the start, or when the data does not fit into memory. -.. topic:: References: +.. rubric:: References - .. [1] `"Learning the parts of objects by non-negative matrix factorization" - `_ - D. Lee, S. Seung, 1999 +.. [1] `"Learning the parts of objects by non-negative matrix factorization" + `_ + D. Lee, S. Seung, 1999 - .. [2] `"Non-negative Matrix Factorization with Sparseness Constraints" - `_ - P. Hoyer, 2004 +.. [2] `"Non-negative Matrix Factorization with Sparseness Constraints" + `_ + P. Hoyer, 2004 - .. [4] `"SVD based initialization: A head start for nonnegative - matrix factorization" - `_ - C. Boutsidis, E. Gallopoulos, 2008 +.. [4] `"SVD based initialization: A head start for nonnegative + matrix factorization" + `_ + C. Boutsidis, E. Gallopoulos, 2008 - .. [5] `"Fast local algorithms for large scale nonnegative matrix and tensor - factorizations." - `_ - A. Cichocki, A. Phan, 2009 +.. [5] `"Fast local algorithms for large scale nonnegative matrix and tensor + factorizations." + `_ + A. Cichocki, A. Phan, 2009 - .. [6] :arxiv:`"Algorithms for nonnegative matrix factorization with - the beta-divergence" <1010.1763>` - C. Fevotte, J. Idier, 2011 +.. [6] :arxiv:`"Algorithms for nonnegative matrix factorization with + the beta-divergence" <1010.1763>` + C. Fevotte, J. Idier, 2011 - .. [7] :arxiv:`"Online algorithms for nonnegative matrix factorization with the - Itakura-Saito divergence" <1106.4198>` - A. Lefevre, F. Bach, C. Fevotte, 2011 +.. [7] :arxiv:`"Online algorithms for nonnegative matrix factorization with the + Itakura-Saito divergence" <1106.4198>` + A. Lefevre, F. Bach, C. Fevotte, 2011 .. _LatentDirichletAllocation: @@ -987,10 +995,10 @@ The graphical model of LDA is a three-level generative model: Note on notations presented in the graphical model above, which can be found in Hoffman et al. (2013): - * The corpus is a collection of :math:`D` documents. - * A document is a sequence of :math:`N` words. - * There are :math:`K` topics in the corpus. - * The boxes represent repeated sampling. +* The corpus is a collection of :math:`D` documents. +* A document is a sequence of :math:`N` words. +* There are :math:`K` topics in the corpus. +* The boxes represent repeated sampling. In the graphical model, each node is a random variable and has a role in the generative process. A shaded node indicates an observed variable and an unshaded @@ -1000,45 +1008,48 @@ of topics in the corpus and the distribution of words in the documents. The goal of LDA is to use the observed words to infer the hidden topic structure. -When modeling text corpora, the model assumes the following generative process -for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` -corresponding to :attr:`n_components` in the API: +.. dropdown:: Details on modeling text corpora - 1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim - \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, - i.e. the probability of a word appearing in topic :math:`k`. - :math:`\eta` corresponds to :attr:`topic_word_prior`. + When modeling text corpora, the model assumes the following generative process + for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` + corresponding to `n_components` in the API: - 2. For each document :math:`d \in D`, draw the topic proportions - :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` - corresponds to :attr:`doc_topic_prior`. + 1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim + \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, + i.e. the probability of a word appearing in topic :math:`k`. + :math:`\eta` corresponds to `topic_word_prior`. - 3. For each word :math:`i` in document :math:`d`: + 2. For each document :math:`d \in D`, draw the topic proportions + :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` + corresponds to `doc_topic_prior`. - a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} - (\theta_d)` - b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} - (\beta_{z_{di}})` + 3. For each word :math:`i` in document :math:`d`: -For parameter estimation, the posterior distribution is: + a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} + (\theta_d)` + b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} + (\beta_{z_{di}})` -.. math:: - p(z, \theta, \beta |w, \alpha, \eta) = - \frac{p(z, \theta, \beta|\alpha, \eta)}{p(w|\alpha, \eta)} + For parameter estimation, the posterior distribution is: -Since the posterior is intractable, variational Bayesian method -uses a simpler distribution :math:`q(z,\theta,\beta | \lambda, \phi, \gamma)` -to approximate it, and those variational parameters :math:`\lambda`, -:math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence -Lower Bound (ELBO): + .. math:: + p(z, \theta, \beta |w, \alpha, \eta) = + \frac{p(z, \theta, \beta|\alpha, \eta)}{p(w|\alpha, \eta)} -.. math:: - \log\: P(w | \alpha, \eta) \geq L(w,\phi,\gamma,\lambda) \overset{\triangle}{=} - E_{q}[\log\:p(w,z,\theta,\beta|\alpha,\eta)] - E_{q}[\log\:q(z, \theta, \beta)] + Since the posterior is intractable, variational Bayesian method + uses a simpler distribution :math:`q(z,\theta,\beta | \lambda, \phi, \gamma)` + to approximate it, and those variational parameters :math:`\lambda`, + :math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence + Lower Bound (ELBO): + + .. math:: + \log\: P(w | \alpha, \eta) \geq L(w,\phi,\gamma,\lambda) \overset{\triangle}{=} + E_{q}[\log\:p(w,z,\theta,\beta|\alpha,\eta)] - E_{q}[\log\:q(z, \theta, \beta)] + + Maximizing ELBO is equivalent to minimizing the Kullback-Leibler(KL) divergence + between :math:`q(z,\theta,\beta)` and the true posterior + :math:`p(z, \theta, \beta |w, \alpha, \eta)`. -Maximizing ELBO is equivalent to minimizing the Kullback-Leibler(KL) divergence -between :math:`q(z,\theta,\beta)` and the true posterior -:math:`p(z, \theta, \beta |w, \alpha, \eta)`. :class:`LatentDirichletAllocation` implements the online variational Bayes algorithm and supports both online and batch update methods. @@ -1054,33 +1065,33 @@ points. When :class:`LatentDirichletAllocation` is applied on a "document-term" matrix, the matrix will be decomposed into a "topic-term" matrix and a "document-topic" matrix. While -"topic-term" matrix is stored as :attr:`components_` in the model, "document-topic" matrix +"topic-term" matrix is stored as `components_` in the model, "document-topic" matrix can be calculated from ``transform`` method. :class:`LatentDirichletAllocation` also implements ``partial_fit`` method. This is used when data can be fetched sequentially. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` -.. topic:: References: +.. rubric:: References - * `"Latent Dirichlet Allocation" - `_ - D. Blei, A. Ng, M. Jordan, 2003 +* `"Latent Dirichlet Allocation" + `_ + D. Blei, A. Ng, M. Jordan, 2003 - * `"Online Learning for Latent Dirichlet Allocation” - `_ - M. Hoffman, D. Blei, F. Bach, 2010 +* `"Online Learning for Latent Dirichlet Allocation” + `_ + M. Hoffman, D. Blei, F. Bach, 2010 - * `"Stochastic Variational Inference" - `_ - M. Hoffman, D. Blei, C. Wang, J. Paisley, 2013 +* `"Stochastic Variational Inference" + `_ + M. Hoffman, D. Blei, C. Wang, J. Paisley, 2013 - * `"The varimax criterion for analytic rotation in factor analysis" - `_ - H. F. Kaiser, 1958 +* `"The varimax criterion for analytic rotation in factor analysis" + `_ + H. F. Kaiser, 1958 See also :ref:`nca_dim_reduction` for dimensionality reduction with Neighborhood Components Analysis. diff --git a/doc/modules/density.rst b/doc/modules/density.rst index fc0530ed262c0..39264f226185d 100644 --- a/doc/modules/density.rst +++ b/doc/modules/density.rst @@ -113,31 +113,34 @@ forms, which are shown in the following figure: .. centered:: |kde_kernels| -The form of these kernels is as follows: +.. dropdown:: Kernels' mathematical expressions -* Gaussian kernel (``kernel = 'gaussian'``) + The form of these kernels is as follows: - :math:`K(x; h) \propto \exp(- \frac{x^2}{2h^2} )` + * Gaussian kernel (``kernel = 'gaussian'``) -* Tophat kernel (``kernel = 'tophat'``) + :math:`K(x; h) \propto \exp(- \frac{x^2}{2h^2} )` - :math:`K(x; h) \propto 1` if :math:`x < h` + * Tophat kernel (``kernel = 'tophat'``) -* Epanechnikov kernel (``kernel = 'epanechnikov'``) + :math:`K(x; h) \propto 1` if :math:`x < h` - :math:`K(x; h) \propto 1 - \frac{x^2}{h^2}` + * Epanechnikov kernel (``kernel = 'epanechnikov'``) -* Exponential kernel (``kernel = 'exponential'``) + :math:`K(x; h) \propto 1 - \frac{x^2}{h^2}` - :math:`K(x; h) \propto \exp(-x/h)` + * Exponential kernel (``kernel = 'exponential'``) -* Linear kernel (``kernel = 'linear'``) + :math:`K(x; h) \propto \exp(-x/h)` - :math:`K(x; h) \propto 1 - x/h` if :math:`x < h` + * Linear kernel (``kernel = 'linear'``) -* Cosine kernel (``kernel = 'cosine'``) + :math:`K(x; h) \propto 1 - x/h` if :math:`x < h` + + * Cosine kernel (``kernel = 'cosine'``) + + :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` - :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` The kernel density estimator can be used with any of the valid distance metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of @@ -171,14 +174,14 @@ on a PCA projection of the data: The "new" data consists of linear combinations of the input data, with weights probabilistically drawn given the KDE model. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_kde_1d.py`: computation of simple kernel - density estimates in one dimension. +* :ref:`sphx_glr_auto_examples_neighbors_plot_kde_1d.py`: computation of simple kernel + density estimates in one dimension. - * :ref:`sphx_glr_auto_examples_neighbors_plot_digits_kde_sampling.py`: an example of using - Kernel Density estimation to learn a generative model of the hand-written - digits data, and drawing new samples from this model. +* :ref:`sphx_glr_auto_examples_neighbors_plot_digits_kde_sampling.py`: an example of using + Kernel Density estimation to learn a generative model of the hand-written + digits data, and drawing new samples from this model. - * :ref:`sphx_glr_auto_examples_neighbors_plot_species_kde.py`: an example of Kernel Density - estimation using the Haversine distance metric to visualize geospatial data +* :ref:`sphx_glr_auto_examples_neighbors_plot_species_kde.py`: an example of Kernel Density + estimation using the Haversine distance metric to visualize geospatial data diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 4559effc00fc1..58ac09583ea6c 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1,621 +1,581 @@ .. _ensemble: -================ -Ensemble methods -================ +=========================================================================== +Ensembles: Gradient boosting, random forests, bagging, voting, stacking +=========================================================================== .. currentmodule:: sklearn.ensemble -The goal of **ensemble methods** is to combine the predictions of several +**Ensemble methods** combine the predictions of several base estimators built with a given learning algorithm in order to improve generalizability / robustness over a single estimator. -Two families of ensemble methods are usually distinguished: - -- In **averaging methods**, the driving principle is to build several - estimators independently and then to average their predictions. On average, - the combined estimator is usually better than any of the single base - estimator because its variance is reduced. - - **Examples:** :ref:`Bagging methods `, :ref:`Forests of randomized trees `, ... +Two very famous examples of ensemble methods are :ref:`gradient-boosted trees +` and :ref:`random forests `. -- By contrast, in **boosting methods**, base estimators are built sequentially - and one tries to reduce the bias of the combined estimator. The motivation is - to combine several weak models to produce a powerful ensemble. +More generally, ensemble models can be applied to any base learner beyond +trees, in averaging methods such as :ref:`Bagging methods `, +:ref:`model stacking `, or :ref:`Voting `, or in +boosting, as :ref:`AdaBoost `. - **Examples:** :ref:`AdaBoost `, :ref:`Gradient Tree Boosting `, ... - - -.. _bagging: +.. _gradient_boosting: -Bagging meta-estimator +Gradient-boosted trees ====================== -In ensemble algorithms, bagging methods form a class of algorithms which build -several instances of a black-box estimator on random subsets of the original -training set and then aggregate their individual predictions to form a final -prediction. These methods are used as a way to reduce the variance of a base -estimator (e.g., a decision tree), by introducing randomization into its -construction procedure and then making an ensemble out of it. In many cases, -bagging methods constitute a very simple way to improve with respect to a -single model, without making it necessary to adapt the underlying base -algorithm. As they provide a way to reduce overfitting, bagging methods work -best with strong and complex models (e.g., fully developed decision trees), in -contrast with boosting methods which usually work best with weak models (e.g., -shallow decision trees). +`Gradient Tree Boosting `_ +or Gradient Boosted Decision Trees (GBDT) is a generalization +of boosting to arbitrary differentiable loss functions, see the seminal work of +[Friedman2001]_. GBDT is an excellent model for both regression and +classification, in particular for tabular data. -Bagging methods come in many flavours but mostly differ from each other by the -way they draw random subsets of the training set: +.. topic:: :class:`GradientBoostingClassifier` vs :class:`HistGradientBoostingClassifier` - * When random subsets of the dataset are drawn as random subsets of the - samples, then this algorithm is known as Pasting [B1999]_. + Scikit-learn provides two implementations of gradient-boosted trees: + :class:`HistGradientBoostingClassifier` vs + :class:`GradientBoostingClassifier` for classification, and the + corresponding classes for regression. The former can be **orders of + magnitude faster** than the latter when the number of samples is + larger than tens of thousands of samples. - * When samples are drawn with replacement, then the method is known as - Bagging [B1996]_. + Missing values and categorical data are natively supported by the + Hist... version, removing the need for additional preprocessing such as + imputation. - * When random subsets of the dataset are drawn as random subsets of - the features, then the method is known as Random Subspaces [H1998]_. + :class:`GradientBoostingClassifier` and + :class:`GradientBoostingRegressor`, might be preferred for small sample + sizes since binning may lead to split points that are too approximate + in this setting. - * Finally, when base estimators are built on subsets of both samples and - features, then the method is known as Random Patches [LG2012]_. +.. _histogram_based_gradient_boosting: -In scikit-learn, bagging methods are offered as a unified -:class:`BaggingClassifier` meta-estimator (resp. :class:`BaggingRegressor`), -taking as input a user-specified estimator along with parameters -specifying the strategy to draw random subsets. In particular, ``max_samples`` -and ``max_features`` control the size of the subsets (in terms of samples and -features), while ``bootstrap`` and ``bootstrap_features`` control whether -samples and features are drawn with or without replacement. When using a subset -of the available samples the generalization accuracy can be estimated with the -out-of-bag samples by setting ``oob_score=True``. As an example, the -snippet below illustrates how to instantiate a bagging ensemble of -:class:`KNeighborsClassifier` estimators, each built on random subsets of -50% of the samples and 50% of the features. +Histogram-Based Gradient Boosting +---------------------------------- - >>> from sklearn.ensemble import BaggingClassifier - >>> from sklearn.neighbors import KNeighborsClassifier - >>> bagging = BaggingClassifier(KNeighborsClassifier(), - ... max_samples=0.5, max_features=0.5) +Scikit-learn 0.21 introduced two new implementations of +gradient boosted trees, namely :class:`HistGradientBoostingClassifier` +and :class:`HistGradientBoostingRegressor`, inspired by +`LightGBM `__ (See [LightGBM]_). -.. topic:: Examples: +These histogram-based estimators can be **orders of magnitude faster** +than :class:`GradientBoostingClassifier` and +:class:`GradientBoostingRegressor` when the number of samples is larger +than tens of thousands of samples. - * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` +They also have built-in support for missing values, which avoids the need +for an imputer. -.. topic:: References +These fast estimators first bin the input samples ``X`` into +integer-valued bins (typically 256 bins) which tremendously reduces the +number of splitting points to consider, and allows the algorithm to +leverage integer-based data structures (histograms) instead of relying on +sorted continuous values when building the trees. The API of these +estimators is slightly different, and some of the features from +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` +are not yet supported, for instance some loss functions. - .. [B1999] L. Breiman, "Pasting small votes for classification in large - databases and on-line", Machine Learning, 36(1), 85-103, 1999. +.. rubric:: Examples - .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), - 123-140, 1996. +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` - .. [H1998] T. Ho, "The random subspace method for constructing decision - forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, - 1998. +Usage +^^^^^ - .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", - Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. +Most of the parameters are unchanged from +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`. +One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and +controls the number of iterations of the boosting process:: -.. _forest: + >>> from sklearn.ensemble import HistGradientBoostingClassifier + >>> from sklearn.datasets import make_hastie_10_2 -Forests of randomized trees -=========================== + >>> X, y = make_hastie_10_2(random_state=0) + >>> X_train, X_test = X[:2000], X[2000:] + >>> y_train, y_test = y[:2000], y[2000:] -The :mod:`sklearn.ensemble` module includes two averaging algorithms based -on randomized :ref:`decision trees `: the RandomForest algorithm -and the Extra-Trees method. Both algorithms are perturb-and-combine -techniques [B1998]_ specifically designed for trees. This means a diverse -set of classifiers is created by introducing randomness in the classifier -construction. The prediction of the ensemble is given as the averaged -prediction of the individual classifiers. + >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train) + >>> clf.score(X_test, y_test) + 0.8965 -As other classifiers, forest classifiers have to be fitted with two -arrays: a sparse or dense array X of shape ``(n_samples, n_features)`` -holding the training samples, and an array Y of shape ``(n_samples,)`` -holding the target values (class labels) for the training samples:: +Available losses for regression are 'squared_error', +'absolute_error', which is less sensitive to outliers, and +'poisson', which is well suited to model counts and frequencies. For +classification, 'log_loss' is the only option. For binary classification it uses the +binary log loss, also known as binomial deviance or binary cross-entropy. For +`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance +and categorical cross-entropy as alternative names. The appropriate loss version is +selected based on :term:`y` passed to :term:`fit`. - >>> from sklearn.ensemble import RandomForestClassifier - >>> X = [[0, 0], [1, 1]] - >>> Y = [0, 1] - >>> clf = RandomForestClassifier(n_estimators=10) - >>> clf = clf.fit(X, Y) +The size of the trees can be controlled through the ``max_leaf_nodes``, +``max_depth``, and ``min_samples_leaf`` parameters. -Like :ref:`decision trees `, forests of trees also extend to -:ref:`multi-output problems ` (if Y is an array -of shape ``(n_samples, n_outputs)``). +The number of bins used to bin the data is controlled with the ``max_bins`` +parameter. Using less bins acts as a form of regularization. It is generally +recommended to use as many bins as possible (255), which is the default. -Random Forests --------------- +The ``l2_regularization`` parameter acts as a regularizer for the loss function, +and corresponds to :math:`\lambda` in the following expression (see equation (2) +in [XGBoost]_): -In random forests (see :class:`RandomForestClassifier` and -:class:`RandomForestRegressor` classes), each tree in the ensemble is built -from a sample drawn with replacement (i.e., a bootstrap sample) from the -training set. +.. math:: -Furthermore, when splitting each node during the construction of a tree, the -best split is found either from all input features or a random subset of size -``max_features``. (See the :ref:`parameter tuning guidelines -` for more details). + \mathcal{L}(\phi) = \sum_i l(\hat{y}_i, y_i) + \frac12 \sum_k \lambda ||w_k||^2 -The purpose of these two sources of randomness is to decrease the variance of -the forest estimator. Indeed, individual decision trees typically exhibit high -variance and tend to overfit. The injected randomness in forests yield decision -trees with somewhat decoupled prediction errors. By taking an average of those -predictions, some errors can cancel out. Random forests achieve a reduced -variance by combining diverse trees, sometimes at the cost of a slight increase -in bias. In practice the variance reduction is often significant hence yielding -an overall better model. +.. dropdown:: Details on l2 regularization -In contrast to the original publication [B2001]_, the scikit-learn -implementation combines classifiers by averaging their probabilistic -prediction, instead of letting each classifier vote for a single class. + It is important to notice that the loss term :math:`l(\hat{y}_i, y_i)` describes + only half of the actual loss function except for the pinball loss and absolute + error. -A competitive alternative to random forests are -:ref:`histogram_based_gradient_boosting` (HGBT) models: + The index :math:`k` refers to the k-th tree in the ensemble of trees. In the + case of regression and binary classification, gradient boosting models grow one + tree per iteration, then :math:`k` runs up to `max_iter`. In the case of + multiclass classification problems, the maximal value of the index :math:`k` is + `n_classes` :math:`\times` `max_iter`. -- Building trees: Random forests typically rely on deep trees (that overfit - individually) which uses much computational resources, as they require - several splittings and evaluations of candidate splits. Boosting models - build shallow trees (that underfit individually) which are faster to fit - and predict. + If :math:`T_k` denotes the number of leaves in the k-th tree, then :math:`w_k` + is a vector of length :math:`T_k`, which contains the leaf values of the form `w + = -sum_gradient / (sum_hessian + l2_regularization)` (see equation (5) in + [XGBoost]_). -- Sequential boosting: In HGBT, the decision trees are built sequentially, - where each tree is trained to correct the errors made by the previous ones. - This allows them to iteratively improve the model's performance using - relatively few trees. In contrast, random forests use a majority vote to - predict the outcome, which can require a larger number of trees to achieve - the same level of accuracy. + The leaf values :math:`w_k` are derived by dividing the sum of the gradients of + the loss function by the combined sum of hessians. Adding the regularization to + the denominator penalizes the leaves with small hessians (flat regions), + resulting in smaller updates. Those :math:`w_k` values contribute then to the + model's prediction for a given input that ends up in the corresponding leaf. The + final prediction is the sum of the base prediction and the contributions from + each tree. The result of that sum is then transformed by the inverse link + function depending on the choice of the loss function (see + :ref:`gradient_boosting_formulation`). -- Efficient binning: HGBT uses an efficient binning algorithm that can handle - large datasets with a high number of features. The binning algorithm can - pre-process the data to speed up the subsequent tree construction (see - :ref:`Why it's faster `). In contrast, the scikit-learn - implementation of random forests does not use binning and relies on exact - splitting, which can be computationally expensive. + Notice that the original paper [XGBoost]_ introduces a term :math:`\gamma\sum_k + T_k` that penalizes the number of leaves (making it a smooth version of + `max_leaf_nodes`) not presented here as it is not implemented in scikit-learn; + whereas :math:`\lambda` penalizes the magnitude of the individual tree + predictions before being rescaled by the learning rate, see + :ref:`gradient_boosting_shrinkage`. -Overall, the computational cost of HGBT versus RF depends on the specific -characteristics of the dataset and the modeling task. It's always a good idea -to try both models and compare their performance and computational efficiency -on your specific problem to determine which model is the best fit. -.. topic:: Examples: +Note that **early-stopping is enabled by default if the number of samples is +larger than 10,000**. The early-stopping behaviour is controlled via the +``early_stopping``, ``scoring``, ``validation_fraction``, +``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop +using an arbitrary :term:`scorer`, or just the training or validation loss. +Note that for technical reasons, using a callable as a scorer is significantly slower +than using the loss. By default, early-stopping is performed if there are at least +10,000 samples in the training set, using the validation loss. - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` +.. _nan_support_hgbt: -Extremely Randomized Trees --------------------------- +Missing values support +^^^^^^^^^^^^^^^^^^^^^^ -In extremely randomized trees (see :class:`ExtraTreesClassifier` -and :class:`ExtraTreesRegressor` classes), randomness goes one step -further in the way splits are computed. As in random forests, a random -subset of candidate features is used, but instead of looking for the -most discriminative thresholds, thresholds are drawn at random for each -candidate feature and the best of these randomly-generated thresholds is -picked as the splitting rule. This usually allows to reduce the variance -of the model a bit more, at the expense of a slightly greater increase -in bias:: +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` have built-in support for missing +values (NaNs). - >>> from sklearn.model_selection import cross_val_score - >>> from sklearn.datasets import make_blobs - >>> from sklearn.ensemble import RandomForestClassifier - >>> from sklearn.ensemble import ExtraTreesClassifier - >>> from sklearn.tree import DecisionTreeClassifier +During training, the tree grower learns at each split point whether samples +with missing values should go to the left or right child, based on the +potential gain. When predicting, samples with missing values are assigned to +the left or right child consequently:: - >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100, - ... random_state=0) + >>> from sklearn.ensemble import HistGradientBoostingClassifier + >>> import numpy as np - >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2, - ... random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.98... + >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] - >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None, - ... min_samples_split=2, random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.999... + >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y) + >>> gbdt.predict(X) + array([0, 0, 1, 1]) - >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, - ... min_samples_split=2, random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() > 0.999 - True +When the missingness pattern is predictive, the splits can be performed on +whether the feature value is missing or not:: -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png - :target: ../auto_examples/ensemble/plot_forest_iris.html - :align: center - :scale: 75% + >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1) + >>> y = [0, 1, 0, 0, 1] + >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1, + ... max_depth=2, + ... learning_rate=1, + ... max_iter=1).fit(X, y) + >>> gbdt.predict(X) + array([0, 1, 0, 0, 1]) -.. _random_forest_parameters: +If no missing values were encountered for a given feature during training, +then samples with missing values are mapped to whichever child has the most +samples. -Parameters ----------- +.. rubric:: Examples -The main parameters to adjust when using these methods is ``n_estimators`` and -``max_features``. The former is the number of trees in the forest. The larger -the better, but also the longer it will take to compute. In addition, note that -results will stop getting significantly better beyond a critical number of -trees. The latter is the size of the random subsets of features to consider -when splitting a node. The lower the greater the reduction of variance, but -also the greater the increase in bias. Empirical good default values are -``max_features=1.0`` or equivalently ``max_features=None`` (always considering -all features instead of a random subset) for regression problems, and -``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``) -for classification tasks (where ``n_features`` is the number of features in -the data). The default value of ``max_features=1.0`` is equivalent to bagged -trees and more randomness can be achieved by setting smaller values (e.g. 0.3 -is a typical default in the literature). Good results are often achieved when -setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e., -when fully developing the trees). Bear in mind though that these values are -usually not optimal, and might result in models that consume a lot of RAM. -The best parameter values should always be cross-validated. In addition, note -that in random forests, bootstrap samples are used by default -(``bootstrap=True``) while the default strategy for extra-trees is to use the -whole dataset (``bootstrap=False``). When using bootstrap sampling the -generalization error can be estimated on the left out or out-of-bag samples. -This can be enabled by setting ``oob_score=True``. +* :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` -.. note:: +.. _sw_hgbdt: - The size of the model with the default parameters is :math:`O( M * N * log (N) )`, - where :math:`M` is the number of trees and :math:`N` is the number of samples. - In order to reduce the size of the model, you can change these parameters: - ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``. +Sample weight support +^^^^^^^^^^^^^^^^^^^^^ -Parallelization ---------------- +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` support sample weights during +:term:`fit`. -Finally, this module also features the parallel construction of the trees -and the parallel computation of the predictions through the ``n_jobs`` -parameter. If ``n_jobs=k`` then computations are partitioned into -``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1`` -then all cores available on the machine are used. Note that because of -inter-process communication overhead, the speedup might not be linear -(i.e., using ``k`` jobs will unfortunately not be ``k`` times as -fast). Significant speedup can still be achieved though when building -a large number of trees, or when building a single tree requires a fair -amount of time (e.g., on large datasets). +The following toy example demonstrates that samples with a sample weight of zero are ignored: -.. topic:: Examples: + >>> X = [[1, 0], + ... [1, 0], + ... [1, 0], + ... [0, 1]] + >>> y = [0, 0, 1, 0] + >>> # ignore the first 2 training samples by setting their weight to 0 + >>> sample_weight = [0, 0, 1, 1] + >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1) + >>> gb.fit(X, y, sample_weight=sample_weight) + HistGradientBoostingClassifier(...) + >>> gb.predict([[1, 0]]) + array([1]) + >>> gb.predict_proba([[1, 0]])[0, 1] + 0.99... - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` +As you can see, the `[1, 0]` is comfortably classified as `1` since the first +two samples are ignored due to their sample weights. -.. topic:: References +Implementation detail: taking sample weights into account amounts to +multiplying the gradients (and the hessians) by the sample weights. Note that +the binning stage (specifically the quantiles computation) does not take the +weights into account. - .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. +.. _categorical_support_gbdt: - .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. +Categorical Features Support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized - trees", Machine Learning, 63(1), 3-42, 2006. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` have native support for categorical +features: they can consider splits on non-ordered, categorical data. -.. _random_forest_feature_importance: +For datasets with categorical features, using the native categorical support +is often better than relying on one-hot encoding +(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding +requires more tree depth to achieve equivalent splits. It is also usually +better to rely on the native categorical support rather than to treat +categorical features as continuous (ordinal), which happens for ordinal-encoded +categorical data, since categories are nominal quantities where order does not +matter. -Feature importance evaluation ------------------------------ +To enable categorical support, a boolean mask can be passed to the +`categorical_features` parameter, indicating which feature is categorical. In +the following, the first feature will be treated as categorical and the +second feature as numerical:: -The relative rank (i.e. depth) of a feature used as a decision node in a -tree can be used to assess the relative importance of that feature with -respect to the predictability of the target variable. Features used at -the top of the tree contribute to the final prediction decision of a -larger fraction of the input samples. The **expected fraction of the -samples** they contribute to can thus be used as an estimate of the -**relative importance of the features**. In scikit-learn, the fraction of -samples a feature contributes to is combined with the decrease in impurity -from splitting them to create a normalized estimate of the predictive power -of that feature. + >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False]) -By **averaging** the estimates of predictive ability over several randomized -trees one can **reduce the variance** of such an estimate and use it -for feature selection. This is known as the mean decrease in impurity, or MDI. -Refer to [L2014]_ for more information on MDI and feature importance -evaluation with Random Forests. +Equivalently, one can pass a list of integers indicating the indices of the +categorical features:: -.. warning:: + >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0]) - The impurity-based feature importances computed on tree-based models suffer - from two flaws that can lead to misleading conclusions. First they are - computed on statistics derived from the training dataset and therefore **do - not necessarily inform us on which features are most important to make good - predictions on held-out dataset**. Secondly, **they favor high cardinality - features**, that is features with many unique values. - :ref:`permutation_importance` is an alternative to impurity-based feature - importance that does not suffer from these flaws. These two methods of - obtaining feature importance are explored in: - :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`. +When the input is a DataFrame, it is also possible to pass a list of column +names:: -The following example shows a color-coded representation of the relative -importances of each individual pixel for a face recognition task using -a :class:`ExtraTreesClassifier` model. + >>> gbdt = HistGradientBoostingClassifier(categorical_features=["site", "manufacturer"]) -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png - :target: ../auto_examples/ensemble/plot_forest_importances_faces.html - :align: center - :scale: 75 +Finally, when the input is a DataFrame we can use +`categorical_features="from_dtype"` in which case all columns with a categorical +`dtype` will be treated as categorical features. -In practice those estimates are stored as an attribute named -``feature_importances_`` on the fitted model. This is an array with shape -``(n_features,)`` whose values are positive and sum to 1.0. The higher -the value, the more important is the contribution of the matching feature -to the prediction function. +The cardinality of each categorical feature must be less than the `max_bins` +parameter. For an example using histogram-based gradient boosting on categorical +features, see +:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`. -.. topic:: Examples: +If there are missing values during training, the missing values will be +treated as a proper category. If there are no missing values during training, +then at prediction time, missing values are mapped to the child node that has +the most samples (just like for continuous features). When predicting, +categories that were not seen during fit time will be treated as missing +values. - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` +.. dropdown:: Split finding with categorical features -.. topic:: References + The canonical way of considering categorical splits in a tree is to consider + all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of + categories. This can quickly become prohibitive when :math:`K` is large. + Fortunately, since gradient boosting trees are always regression trees (even + for classification problems), there exist a faster strategy that can yield + equivalent splits. First, the categories of a feature are sorted according to + the variance of the target, for each category `k`. Once the categories are + sorted, one can consider *continuous partitions*, i.e. treat the categories + as if they were ordered continuous values (see Fisher [Fisher1958]_ for a + formal proof). As a result, only :math:`K - 1` splits need to be considered + instead of :math:`2^{K - 1} - 1`. The initial sorting is a + :math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of + :math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. - .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to - Practice" <1407.7502>`, - PhD Thesis, U. of Liege, 2014. +.. rubric:: Examples -.. _random_trees_embedding: +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` -Totally Random Trees Embedding ------------------------------- +.. _monotonic_cst_gbdt: -:class:`RandomTreesEmbedding` implements an unsupervised transformation of the -data. Using a forest of completely random trees, :class:`RandomTreesEmbedding` -encodes the data by the indices of the leaves a data point ends up in. This -index is then encoded in a one-of-K manner, leading to a high dimensional, -sparse binary coding. -This coding can be computed very efficiently and can then be used as a basis -for other learning tasks. -The size and sparsity of the code can be influenced by choosing the number of -trees and the maximum depth per tree. For each tree in the ensemble, the coding -contains one entry of one. The size of the coding is at most ``n_estimators * 2 -** max_depth``, the maximum number of leaves in the forest. +Monotonic Constraints +^^^^^^^^^^^^^^^^^^^^^ -As neighboring data points are more likely to lie within the same leaf of a -tree, the transformation performs an implicit, non-parametric density -estimation. +Depending on the problem at hand, you may have prior knowledge indicating +that a given feature should in general have a positive (or negative) effect +on the target value. For example, all else being equal, a higher credit +score should increase the probability of getting approved for a loan. +Monotonic constraints allow you to incorporate such prior knowledge into the +model. -.. topic:: Examples: +For a predictor :math:`F` with two features: - * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` +- a **monotonic increase constraint** is a constraint of the form: - * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear - dimensionality reduction techniques on handwritten digits. + .. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2) - * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares - supervised and unsupervised tree based feature transformations. +- a **monotonic decrease constraint** is a constraint of the form: -.. seealso:: + .. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2) - :ref:`manifold` techniques can also be useful to derive non-linear - representations of feature space, also these approaches focus also on - dimensionality reduction. +You can specify a monotonic constraint on each feature using the +`monotonic_cst` parameter. For each feature, a value of 0 indicates no +constraint, while 1 and -1 indicate a monotonic increase and +monotonic decrease constraint, respectively:: + >>> from sklearn.ensemble import HistGradientBoostingRegressor -.. _adaboost: + ... # monotonic increase, monotonic decrease, and no constraint on the 3 features + >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0]) -AdaBoost -======== +In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed +to have a positive (negative) effect on the probability of samples +to belong to the positive class. -The module :mod:`sklearn.ensemble` includes the popular boosting algorithm -AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_. +Nevertheless, monotonic constraints only marginally constrain feature effects on the output. +For instance, monotonic increase and decrease constraints cannot be used to enforce the +following modelling constraint: -The core principle of AdaBoost is to fit a sequence of weak learners (i.e., -models that are only slightly better than random guessing, such as small -decision trees) on repeatedly modified versions of the data. The predictions -from all of them are then combined through a weighted majority vote (or sum) to -produce the final prediction. The data modifications at each so-called boosting -iteration consist of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N` -to each of the training samples. Initially, those weights are all set to -:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the -original data. For each successive iteration, the sample weights are -individually modified and the learning algorithm is reapplied to the reweighted -data. At a given step, those training examples that were incorrectly predicted -by the boosted model induced at the previous step have their weights increased, -whereas the weights are decreased for those that were predicted correctly. As -iterations proceed, examples that are difficult to predict receive -ever-increasing influence. Each subsequent weak learner is thereby forced to -concentrate on the examples that are missed by the previous ones in the sequence -[HTF]_. +.. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2') -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_hastie_10_2_001.png - :target: ../auto_examples/ensemble/plot_adaboost_hastie_10_2.html - :align: center - :scale: 75 +Also, monotonic constraints are not supported for multiclass classification. -AdaBoost can be used both for classification and regression problems: +.. note:: + Since categories are unordered quantities, it is not possible to enforce + monotonic constraints on categorical features. - - For multi-class classification, :class:`AdaBoostClassifier` implements - AdaBoost-SAMME and AdaBoost-SAMME.R [ZZRH2009]_. +.. rubric:: Examples - - For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_. +* :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` -Usage ------ +.. _interaction_cst_hgbt: -The following example shows how to fit an AdaBoost classifier with 100 weak -learners:: +Interaction constraints +^^^^^^^^^^^^^^^^^^^^^^^ - >>> from sklearn.model_selection import cross_val_score - >>> from sklearn.datasets import load_iris - >>> from sklearn.ensemble import AdaBoostClassifier +A priori, the histogram gradient boosted trees are allowed to use any feature +to split a node into child nodes. This creates so called interactions between +features, i.e. usage of different features as split along a branch. Sometimes, +one wants to restrict the possible interactions, see [Mayer2022]_. This can be +done by the parameter ``interaction_cst``, where one can specify the indices +of features that are allowed to interact. +For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]`` +forbids all interactions. +The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly +interacting features. Features 0 and 1 may interact with each other, as well +as features 1 and 2. But note that features 0 and 2 are forbidden to interact. +The following depicts a tree and the possible splits of the tree: - >>> X, y = load_iris(return_X_y=True) - >>> clf = AdaBoostClassifier(n_estimators=100) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.9... +.. code-block:: none -The number of weak learners is controlled by the parameter ``n_estimators``. The -``learning_rate`` parameter controls the contribution of the weak learners in -the final combination. By default, weak learners are decision stumps. Different -weak learners can be specified through the ``estimator`` parameter. -The main parameters to tune to obtain good results are ``n_estimators`` and -the complexity of the base estimators (e.g., its depth ``max_depth`` or -minimum required number of samples to consider a split ``min_samples_split``). + 1 <- Both constraint groups could be applied from now on + / \ + 1 2 <- Left split still fulfills both constraint groups. + / \ / \ Right split at feature 2 has only group {1, 2} from now on. -.. topic:: Examples: +LightGBM uses the same logic for overlapping groups. - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_hastie_10_2.py` compares the - classification error of a decision stump, decision tree, and a boosted - decision stump using AdaBoost-SAMME and AdaBoost-SAMME.R. +Note that features not listed in ``interaction_cst`` are automatically +assigned an interaction group for themselves. With again 3 features, this +means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``. - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance - of AdaBoost-SAMME and AdaBoost-SAMME.R on a multi-class problem. +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary - and decision function values for a non-linearly separable two-class problem - using AdaBoost-SAMME. +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression - with the AdaBoost.R2 algorithm. +.. rubric:: References -.. topic:: References +.. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. + 2022. :doi:`Machine Learning Applications to Land and Structure Valuation + <10.3390/jrfm15050193>`. + Journal of Risk and Financial Management 15, no. 5: 193 - .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of - On-Line Learning and an Application to Boosting", 1997. +Low-level parallelism +^^^^^^^^^^^^^^^^^^^^^ - .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", - 2009. - .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` use OpenMP +for parallelization through Cython. For more details on how to control the +number of threads, please refer to our :ref:`parallelism` notes. - .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of - Statistical Learning Ed. 2", Springer, 2009. +The following parts are parallelized: +- mapping samples from real values to integer-valued bins (finding the bin + thresholds is however sequential) +- building histograms is parallelized over features +- finding the best split point at a node is parallelized over features +- during fit, mapping samples into the left and right children is + parallelized over samples +- gradient and hessians computations are parallelized over samples +- predicting is parallelized over samples -.. _gradient_boosting: +.. _Why_it's_faster: -Gradient Tree Boosting -====================== +Why it's faster +^^^^^^^^^^^^^^^ -`Gradient Tree Boosting `_ -or Gradient Boosted Decision Trees (GBDT) is a generalization -of boosting to arbitrary differentiable loss functions, see the seminal work of -[Friedman2001]_. GBDT is an accurate and effective off-the-shelf procedure that can be -used for both regression and classification problems in a -variety of areas including Web search ranking and ecology. +The bottleneck of a gradient boosting procedure is building the decision +trees. Building a traditional decision tree (as in the other GBDTs +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`) +requires sorting the samples at each node (for +each feature). Sorting is needed so that the potential gain of a split point +can be computed efficiently. Splitting a single node has thus a complexity +of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n` +is the number of samples at the node. -The module :mod:`sklearn.ensemble` provides methods -for both classification and regression via gradient boosted decision -trees. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the +feature values and instead use a data-structure called a histogram, where the +samples are implicitly ordered. Building a histogram has a +:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a +:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller +than the previous one. In addition, instead of considering :math:`n` split +points, we consider only ``max_bins`` split points, which might be much +smaller. -.. note:: +In order to build histograms, the input data `X` needs to be binned into +integer-valued bins. This binning procedure does require sorting the feature +values, but it only happens once at the very beginning of the boosting process +(not at each node, like in :class:`GradientBoostingClassifier` and +:class:`GradientBoostingRegressor`). - Scikit-learn 0.21 introduces two new implementations of - gradient boosting trees, namely :class:`HistGradientBoostingClassifier` - and :class:`HistGradientBoostingRegressor`, inspired by - `LightGBM `__ (See [LightGBM]_). +Finally, many parts of the implementation of +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` are parallelized. - These histogram-based estimators can be **orders of magnitude faster** - than :class:`GradientBoostingClassifier` and - :class:`GradientBoostingRegressor` when the number of samples is larger - than tens of thousands of samples. +.. rubric:: References - They also have built-in support for missing values, which avoids the need - for an imputer. +.. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree + Boosting System" <1603.02754>` - These estimators are described in more detail below in - :ref:`histogram_based_gradient_boosting`. +.. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient + BoostingDecision Tree" `_ - The following guide focuses on :class:`GradientBoostingClassifier` and - :class:`GradientBoostingRegressor`, which might be preferred for small - sample sizes since binning may lead to split points that are too approximate - in this setting. +.. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" + `_ + Journal of the American Statistical Association, 53, 789-798. -The usage and the parameters of :class:`GradientBoostingClassifier` and -:class:`GradientBoostingRegressor` are described below. The 2 most important -parameters of these estimators are `n_estimators` and `learning_rate`. - -Classification ---------------- - -:class:`GradientBoostingClassifier` supports both binary and multi-class -classification. -The following example shows how to fit a gradient boosting classifier -with 100 decision stumps as weak learners:: - - >>> from sklearn.datasets import make_hastie_10_2 - >>> from sklearn.ensemble import GradientBoostingClassifier - - >>> X, y = make_hastie_10_2(random_state=0) - >>> X_train, X_test = X[:2000], X[2000:] - >>> y_train, y_test = y[:2000], y[2000:] - >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, - ... max_depth=1, random_state=0).fit(X_train, y_train) - >>> clf.score(X_test, y_test) - 0.913... - -The number of weak learners (i.e. regression trees) is controlled by the -parameter ``n_estimators``; :ref:`The size of each tree -` can be controlled either by setting the tree -depth via ``max_depth`` or by setting the number of leaf nodes via -``max_leaf_nodes``. The ``learning_rate`` is a hyper-parameter in the range -(0.0, 1.0] that controls overfitting via :ref:`shrinkage -` . - -.. note:: - - Classification with more than 2 classes requires the induction - of ``n_classes`` regression trees at each iteration, - thus, the total number of induced trees equals - ``n_classes * n_estimators``. For datasets with a large number - of classes we strongly recommend to use - :class:`HistGradientBoostingClassifier` as an alternative to - :class:`GradientBoostingClassifier` . - -Regression ----------- - -:class:`GradientBoostingRegressor` supports a number of -:ref:`different loss functions ` -for regression which can be specified via the argument -``loss``; the default loss function for regression is squared error -(``'squared_error'``). +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` +---------------------------------------------------------------------------- -:: +The usage and the parameters of :class:`GradientBoostingClassifier` and +:class:`GradientBoostingRegressor` are described below. The 2 most important +parameters of these estimators are `n_estimators` and `learning_rate`. - >>> import numpy as np - >>> from sklearn.metrics import mean_squared_error - >>> from sklearn.datasets import make_friedman1 - >>> from sklearn.ensemble import GradientBoostingRegressor - - >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) - >>> X_train, X_test = X[:200], X[200:] - >>> y_train, y_test = y[:200], y[200:] - >>> est = GradientBoostingRegressor( - ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, - ... loss='squared_error' - ... ).fit(X_train, y_train) - >>> mean_squared_error(y_test, est.predict(X_test)) - 5.00... - -The figure below shows the results of applying :class:`GradientBoostingRegressor` -with least squares loss and 500 base learners to the diabetes dataset -(:func:`sklearn.datasets.load_diabetes`). -The plot shows the train and test error at each iteration. -The train error at each iteration is stored in the -:attr:`~GradientBoostingRegressor.train_score_` attribute -of the gradient boosting model. The test error at each iterations can be obtained -via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a -generator that yields the predictions at each stage. Plots like these can be used -to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping. - -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_regression_001.png - :target: ../auto_examples/ensemble/plot_gradient_boosting_regression.html - :align: center - :scale: 75 +.. dropdown:: Classification + + :class:`GradientBoostingClassifier` supports both binary and multi-class + classification. + The following example shows how to fit a gradient boosting classifier + with 100 decision stumps as weak learners:: + + >>> from sklearn.datasets import make_hastie_10_2 + >>> from sklearn.ensemble import GradientBoostingClassifier + + >>> X, y = make_hastie_10_2(random_state=0) + >>> X_train, X_test = X[:2000], X[2000:] + >>> y_train, y_test = y[:2000], y[2000:] + + >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, + ... max_depth=1, random_state=0).fit(X_train, y_train) + >>> clf.score(X_test, y_test) + 0.913... + + The number of weak learners (i.e. regression trees) is controlled by the + parameter ``n_estimators``; :ref:`The size of each tree + ` can be controlled either by setting the tree + depth via ``max_depth`` or by setting the number of leaf nodes via + ``max_leaf_nodes``. The ``learning_rate`` is a hyper-parameter in the range + (0.0, 1.0] that controls overfitting via :ref:`shrinkage + ` . + + .. note:: + + Classification with more than 2 classes requires the induction + of ``n_classes`` regression trees at each iteration, + thus, the total number of induced trees equals + ``n_classes * n_estimators``. For datasets with a large number + of classes we strongly recommend to use + :class:`HistGradientBoostingClassifier` as an alternative to + :class:`GradientBoostingClassifier` . + +.. dropdown:: Regression + + :class:`GradientBoostingRegressor` supports a number of + :ref:`different loss functions ` + for regression which can be specified via the argument + ``loss``; the default loss function for regression is squared error + (``'squared_error'``). + + :: + + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from sklearn.datasets import make_friedman1 + >>> from sklearn.ensemble import GradientBoostingRegressor + + >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) + >>> X_train, X_test = X[:200], X[200:] + >>> y_train, y_test = y[:200], y[200:] + >>> est = GradientBoostingRegressor( + ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, + ... loss='squared_error' + ... ).fit(X_train, y_train) + >>> mean_squared_error(y_test, est.predict(X_test)) + 5.00... + + The figure below shows the results of applying :class:`GradientBoostingRegressor` + with least squares loss and 500 base learners to the diabetes dataset + (:func:`sklearn.datasets.load_diabetes`). + The plot shows the train and test error at each iteration. + The train error at each iteration is stored in the + `train_score_` attribute of the gradient boosting model. + The test error at each iterations can be obtained + via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a + generator that yields the predictions at each stage. Plots like these can be used + to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping. + + .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_regression_001.png + :target: ../auto_examples/ensemble/plot_gradient_boosting_regression.html + :align: center + :scale: 75 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` .. _gradient_boosting_warm_start: Fitting additional weak-learners --------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Both :class:`GradientBoostingRegressor` and :class:`GradientBoostingClassifier` support ``warm_start=True`` which allows you to add more estimators to an already @@ -623,7 +583,22 @@ fitted model. :: - >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and new nr of trees + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from sklearn.datasets import make_friedman1 + >>> from sklearn.ensemble import GradientBoostingRegressor + + >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) + >>> X_train, X_test = X[:200], X[200:] + >>> y_train, y_test = y[:200], y[200:] + >>> est = GradientBoostingRegressor( + ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, + ... loss='squared_error' + ... ) + >>> est = est.fit(X_train, y_train) # fit with 100 trees + >>> mean_squared_error(y_test, est.predict(X_test)) + 5.00... + >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and increase num of trees >>> _ = est.fit(X_train, y_train) # fit additional 100 trees to est >>> mean_squared_error(y_test, est.predict(X_test)) 3.84... @@ -631,7 +606,7 @@ fitted model. .. _gradient_boosting_tree_size: Controlling the tree size -------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^ The size of the regression tree base learners defines the level of variable interactions that can be captured by the gradient boosting model. In general, @@ -657,51 +632,52 @@ The parameter ``max_leaf_nodes`` corresponds to the variable ``J`` in the chapter on gradient boosting in [Friedman2001]_ and is related to the parameter ``interaction.depth`` in R's gbm package where ``max_leaf_nodes == interaction.depth + 1`` . +.. _gradient_boosting_formulation: + Mathematical formulation -------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^ We first present GBRT for regression, and then detail the classification case. -Regression -^^^^^^^^^^ +.. dropdown:: Regression -GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a -given input :math:`x_i` is of the following form: + GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a + given input :math:`x_i` is of the following form: .. math:: \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i) -where the :math:`h_m` are estimators called *weak learners* in the context -of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors -` of fixed size as weak learners. The constant M corresponds to the -`n_estimators` parameter. + where the :math:`h_m` are estimators called *weak learners* in the context + of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors + ` of fixed size as weak learners. The constant M corresponds to the + `n_estimators` parameter. -Similar to other boosting algorithms, a GBRT is built in a greedy fashion: + Similar to other boosting algorithms, a GBRT is built in a greedy fashion: .. math:: F_m(x) = F_{m-1}(x) + h_m(x), -where the newly added tree :math:`h_m` is fitted in order to minimize a sum -of losses :math:`L_m`, given the previous ensemble :math:`F_{m-1}`: + where the newly added tree :math:`h_m` is fitted in order to minimize a sum + of losses :math:`L_m`, given the previous ensemble :math:`F_{m-1}`: .. math:: h_m = \arg\min_{h} L_m = \arg\min_{h} \sum_{i=1}^{n} l(y_i, F_{m-1}(x_i) + h(x_i)), -where :math:`l(y_i, F(x_i))` is defined by the `loss` parameter, detailed -in the next section. + where :math:`l(y_i, F(x_i))` is defined by the `loss` parameter, detailed + in the next section. -By default, the initial model :math:`F_{0}` is chosen as the constant that -minimizes the loss: for a least-squares loss, this is the empirical mean of -the target values. The initial model can also be specified via the ``init`` -argument. + By default, the initial model :math:`F_{0}` is chosen as the constant that + minimizes the loss: for a least-squares loss, this is the empirical mean of + the target values. The initial model can also be specified via the ``init`` + argument. -Using a first-order Taylor approximation, the value of :math:`l` can be -approximated as follows: + Using a first-order Taylor approximation, the value of :math:`l` can be + approximated as follows: .. math:: @@ -710,111 +686,110 @@ approximated as follows: + h_m(x_i) \left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} \right]_{F=F_{m - 1}}. -.. note:: + .. note:: - Briefly, a first-order Taylor approximation says that - :math:`l(z) \approx l(a) + (z - a) \frac{\partial l}{\partial z}(a)`. - Here, :math:`z` corresponds to :math:`F_{m - 1}(x_i) + h_m(x_i)`, and - :math:`a` corresponds to :math:`F_{m-1}(x_i)` + Briefly, a first-order Taylor approximation says that + :math:`l(z) \approx l(a) + (z - a) \frac{\partial l}{\partial z}(a)`. + Here, :math:`z` corresponds to :math:`F_{m - 1}(x_i) + h_m(x_i)`, and + :math:`a` corresponds to :math:`F_{m-1}(x_i)` -The quantity :math:`\left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} -\right]_{F=F_{m - 1}}` is the derivative of the loss with respect to its -second parameter, evaluated at :math:`F_{m-1}(x)`. It is easy to compute for -any given :math:`F_{m - 1}(x_i)` in a closed form since the loss is -differentiable. We will denote it by :math:`g_i`. + The quantity :math:`\left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} + \right]_{F=F_{m - 1}}` is the derivative of the loss with respect to its + second parameter, evaluated at :math:`F_{m-1}(x)`. It is easy to compute for + any given :math:`F_{m - 1}(x_i)` in a closed form since the loss is + differentiable. We will denote it by :math:`g_i`. -Removing the constant terms, we have: + Removing the constant terms, we have: .. math:: h_m \approx \arg\min_{h} \sum_{i=1}^{n} h(x_i) g_i -This is minimized if :math:`h(x_i)` is fitted to predict a value that is -proportional to the negative gradient :math:`-g_i`. Therefore, at each -iteration, **the estimator** :math:`h_m` **is fitted to predict the negative -gradients of the samples**. The gradients are updated at each iteration. -This can be considered as some kind of gradient descent in a functional -space. + This is minimized if :math:`h(x_i)` is fitted to predict a value that is + proportional to the negative gradient :math:`-g_i`. Therefore, at each + iteration, **the estimator** :math:`h_m` **is fitted to predict the negative + gradients of the samples**. The gradients are updated at each iteration. + This can be considered as some kind of gradient descent in a functional + space. -.. note:: + .. note:: - For some losses, e.g. ``'absolute_error'`` where the gradients - are :math:`\pm 1`, the values predicted by a fitted :math:`h_m` are not - accurate enough: the tree can only output integer values. As a result, the - leaves values of the tree :math:`h_m` are modified once the tree is - fitted, such that the leaves values minimize the loss :math:`L_m`. The - update is loss-dependent: for the absolute error loss, the value of - a leaf is updated to the median of the samples in that leaf. + For some losses, e.g. ``'absolute_error'`` where the gradients + are :math:`\pm 1`, the values predicted by a fitted :math:`h_m` are not + accurate enough: the tree can only output integer values. As a result, the + leaves values of the tree :math:`h_m` are modified once the tree is + fitted, such that the leaves values minimize the loss :math:`L_m`. The + update is loss-dependent: for the absolute error loss, the value of + a leaf is updated to the median of the samples in that leaf. -Classification -^^^^^^^^^^^^^^ +.. dropdown:: Classification -Gradient boosting for classification is very similar to the regression case. -However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not -homogeneous to a prediction: it cannot be a class, since the trees predict -continuous values. + Gradient boosting for classification is very similar to the regression case. + However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not + homogeneous to a prediction: it cannot be a class, since the trees predict + continuous values. -The mapping from the value :math:`F_M(x_i)` to a class or a probability is -loss-dependent. For the log-loss, the probability that -:math:`x_i` belongs to the positive class is modeled as :math:`p(y_i = 1 | -x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid or expit function. + The mapping from the value :math:`F_M(x_i)` to a class or a probability is + loss-dependent. For the log-loss, the probability that + :math:`x_i` belongs to the positive class is modeled as :math:`p(y_i = 1 | + x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid or expit function. -For multiclass classification, K trees (for K classes) are built at each of -the :math:`M` iterations. The probability that :math:`x_i` belongs to class -k is modeled as a softmax of the :math:`F_{M,k}(x_i)` values. + For multiclass classification, K trees (for K classes) are built at each of + the :math:`M` iterations. The probability that :math:`x_i` belongs to class + k is modeled as a softmax of the :math:`F_{M,k}(x_i)` values. -Note that even for a classification task, the :math:`h_m` sub-estimator is -still a regressor, not a classifier. This is because the sub-estimators are -trained to predict (negative) *gradients*, which are always continuous -quantities. + Note that even for a classification task, the :math:`h_m` sub-estimator is + still a regressor, not a classifier. This is because the sub-estimators are + trained to predict (negative) *gradients*, which are always continuous + quantities. .. _gradient_boosting_loss: Loss Functions --------------- +^^^^^^^^^^^^^^ The following loss functions are supported and can be specified using the parameter ``loss``: - * Regression - - * Squared error (``'squared_error'``): The natural choice for regression - due to its superior computational properties. The initial model is - given by the mean of the target values. - * Absolute error (``'absolute_error'``): A robust loss function for - regression. The initial model is given by the median of the - target values. - * Huber (``'huber'``): Another robust loss function that combines - least squares and least absolute deviation; use ``alpha`` to - control the sensitivity with regards to outliers (see [Friedman2001]_ for - more details). - * Quantile (``'quantile'``): A loss function for quantile regression. - Use ``0 < alpha < 1`` to specify the quantile. This loss function - can be used to create prediction intervals - (see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`). - - * Classification - - * Binary log-loss (``'log-loss'``): The binomial - negative log-likelihood loss function for binary classification. It provides - probability estimates. The initial model is given by the - log odds-ratio. - * Multi-class log-loss (``'log-loss'``): The multinomial - negative log-likelihood loss function for multi-class classification with - ``n_classes`` mutually exclusive classes. It provides - probability estimates. The initial model is given by the - prior probability of each class. At each iteration ``n_classes`` - regression trees have to be constructed which makes GBRT rather - inefficient for data sets with a large number of classes. - * Exponential loss (``'exponential'``): The same loss function - as :class:`AdaBoostClassifier`. Less robust to mislabeled - examples than ``'log-loss'``; can only be used for binary - classification. +.. dropdown:: Regression + + * Squared error (``'squared_error'``): The natural choice for regression + due to its superior computational properties. The initial model is + given by the mean of the target values. + * Absolute error (``'absolute_error'``): A robust loss function for + regression. The initial model is given by the median of the + target values. + * Huber (``'huber'``): Another robust loss function that combines + least squares and least absolute deviation; use ``alpha`` to + control the sensitivity with regards to outliers (see [Friedman2001]_ for + more details). + * Quantile (``'quantile'``): A loss function for quantile regression. + Use ``0 < alpha < 1`` to specify the quantile. This loss function + can be used to create prediction intervals + (see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`). + +.. dropdown:: Classification + + * Binary log-loss (``'log-loss'``): The binomial + negative log-likelihood loss function for binary classification. It provides + probability estimates. The initial model is given by the + log odds-ratio. + * Multi-class log-loss (``'log-loss'``): The multinomial + negative log-likelihood loss function for multi-class classification with + ``n_classes`` mutually exclusive classes. It provides + probability estimates. The initial model is given by the + prior probability of each class. At each iteration ``n_classes`` + regression trees have to be constructed which makes GBRT rather + inefficient for data sets with a large number of classes. + * Exponential loss (``'exponential'``): The same loss function + as :class:`AdaBoostClassifier`. Less robust to mislabeled + examples than ``'log-loss'``; can only be used for binary + classification. .. _gradient_boosting_shrinkage: Shrinkage via learning rate ---------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^ [Friedman2001]_ proposed a simple regularization strategy that scales the contribution of each weak learner by a constant factor :math:`\nu`: @@ -833,12 +808,14 @@ of ``learning_rate`` require larger numbers of weak learners to maintain a constant training error. Empirical evidence suggests that small values of ``learning_rate`` favor better test error. [HTF]_ recommend to set the learning rate to a small constant -(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` by early -stopping. For a more detailed discussion of the interaction between +(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` large enough +that early stopping applies, +see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py` +for a more detailed discussion of the interaction between ``learning_rate`` and ``n_estimators`` see [R2007]_. Subsampling ------------ +^^^^^^^^^^^^ [Friedman2002]_ proposed stochastic gradient boosting, which combines gradient boosting with bootstrap averaging (bagging). At each iteration @@ -867,23 +844,22 @@ parameter. Stochastic gradient boosting allows to compute out-of-bag estimates of the test deviance by computing the improvement in deviance on the examples that are not included in the bootstrap sample (i.e. the out-of-bag examples). -The improvements are stored in the attribute -:attr:`~GradientBoostingRegressor.oob_improvement_`. ``oob_improvement_[i]`` holds -the improvement in terms of the loss on the OOB samples if you add the i-th stage -to the current predictions. +The improvements are stored in the attribute `oob_improvement_`. +``oob_improvement_[i]`` holds the improvement in terms of the loss on the OOB samples +if you add the i-th stage to the current predictions. Out-of-bag estimates can be used for model selection, for example to determine the optimal number of iterations. OOB estimates are usually very pessimistic thus we recommend to use cross-validation instead and only use OOB if cross-validation is too time consuming. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regularization.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regularization.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py` Interpretation with feature importance --------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Individual decision trees can be interpreted easily by simply visualizing the tree structure. Gradient boosting models, however, @@ -923,392 +899,424 @@ Note that this computation of feature importance is based on entropy, and it is distinct from :func:`sklearn.inspection.permutation_importance` which is based on permutation of the features. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` -.. topic:: References +.. rubric:: References - .. [Friedman2001] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient - boosting machine <10.1214/aos/1013203451>`. - Annals of Statistics, 29, 1189-1232. +.. [Friedman2001] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient + boosting machine <10.1214/aos/1013203451>`. + Annals of Statistics, 29, 1189-1232. - .. [Friedman2002] Friedman, J.H. (2002). `Stochastic gradient boosting. - `_. - Computational Statistics & Data Analysis, 38, 367-378. +.. [Friedman2002] Friedman, J.H. (2002). `Stochastic gradient boosting. + `_. + Computational Statistics & Data Analysis, 38, 367-378. - .. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm - package `_ +.. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm + package `_ -.. _histogram_based_gradient_boosting: +.. _forest: -Histogram-Based Gradient Boosting -================================= +Random forests and other randomized tree ensembles +=================================================== -Scikit-learn 0.21 introduced two new implementations of -gradient boosting trees, namely :class:`HistGradientBoostingClassifier` -and :class:`HistGradientBoostingRegressor`, inspired by -`LightGBM `__ (See [LightGBM]_). +The :mod:`sklearn.ensemble` module includes two averaging algorithms based +on randomized :ref:`decision trees `: the RandomForest algorithm +and the Extra-Trees method. Both algorithms are perturb-and-combine +techniques [B1998]_ specifically designed for trees. This means a diverse +set of classifiers is created by introducing randomness in the classifier +construction. The prediction of the ensemble is given as the averaged +prediction of the individual classifiers. -These histogram-based estimators can be **orders of magnitude faster** -than :class:`GradientBoostingClassifier` and -:class:`GradientBoostingRegressor` when the number of samples is larger -than tens of thousands of samples. +As other classifiers, forest classifiers have to be fitted with two +arrays: a sparse or dense array X of shape ``(n_samples, n_features)`` +holding the training samples, and an array Y of shape ``(n_samples,)`` +holding the target values (class labels) for the training samples:: -They also have built-in support for missing values, which avoids the need -for an imputer. + >>> from sklearn.ensemble import RandomForestClassifier + >>> X = [[0, 0], [1, 1]] + >>> Y = [0, 1] + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, Y) -These fast estimators first bin the input samples ``X`` into -integer-valued bins (typically 256 bins) which tremendously reduces the -number of splitting points to consider, and allows the algorithm to -leverage integer-based data structures (histograms) instead of relying on -sorted continuous values when building the trees. The API of these -estimators is slightly different, and some of the features from -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` -are not yet supported, for instance some loss functions. +Like :ref:`decision trees `, forests of trees also extend to +:ref:`multi-output problems ` (if Y is an array +of shape ``(n_samples, n_outputs)``). -.. topic:: Examples: +Random Forests +-------------- - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +In random forests (see :class:`RandomForestClassifier` and +:class:`RandomForestRegressor` classes), each tree in the ensemble is built +from a sample drawn with replacement (i.e., a bootstrap sample) from the +training set. -Usage ------ +Furthermore, when splitting each node during the construction of a tree, the +best split is found through an exhaustive search of the features values of +either all input features or a random subset of size ``max_features``. +(See the :ref:`parameter tuning guidelines ` for more details.) -Most of the parameters are unchanged from -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`. -One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and -controls the number of iterations of the boosting process:: +The purpose of these two sources of randomness is to decrease the variance of +the forest estimator. Indeed, individual decision trees typically exhibit high +variance and tend to overfit. The injected randomness in forests yield decision +trees with somewhat decoupled prediction errors. By taking an average of those +predictions, some errors can cancel out. Random forests achieve a reduced +variance by combining diverse trees, sometimes at the cost of a slight increase +in bias. In practice the variance reduction is often significant hence yielding +an overall better model. - >>> from sklearn.ensemble import HistGradientBoostingClassifier - >>> from sklearn.datasets import make_hastie_10_2 +In contrast to the original publication [B2001]_, the scikit-learn +implementation combines classifiers by averaging their probabilistic +prediction, instead of letting each classifier vote for a single class. - >>> X, y = make_hastie_10_2(random_state=0) - >>> X_train, X_test = X[:2000], X[2000:] - >>> y_train, y_test = y[:2000], y[2000:] +A competitive alternative to random forests are +:ref:`histogram_based_gradient_boosting` (HGBT) models: - >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train) - >>> clf.score(X_test, y_test) - 0.8965 +- Building trees: Random forests typically rely on deep trees (that overfit + individually) which uses much computational resources, as they require + several splittings and evaluations of candidate splits. Boosting models + build shallow trees (that underfit individually) which are faster to fit + and predict. -Available losses for regression are 'squared_error', -'absolute_error', which is less sensitive to outliers, and -'poisson', which is well suited to model counts and frequencies. For -classification, 'log_loss' is the only option. For binary classification it uses the -binary log loss, also known as binomial deviance or binary cross-entropy. For -`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance -and categorical cross-entropy as alternative names. The appropriate loss version is -selected based on :term:`y` passed to :term:`fit`. +- Sequential boosting: In HGBT, the decision trees are built sequentially, + where each tree is trained to correct the errors made by the previous ones. + This allows them to iteratively improve the model's performance using + relatively few trees. In contrast, random forests use a majority vote to + predict the outcome, which can require a larger number of trees to achieve + the same level of accuracy. -The size of the trees can be controlled through the ``max_leaf_nodes``, -``max_depth``, and ``min_samples_leaf`` parameters. +- Efficient binning: HGBT uses an efficient binning algorithm that can handle + large datasets with a high number of features. The binning algorithm can + pre-process the data to speed up the subsequent tree construction (see + :ref:`Why it's faster `). In contrast, the scikit-learn + implementation of random forests does not use binning and relies on exact + splitting, which can be computationally expensive. -The number of bins used to bin the data is controlled with the ``max_bins`` -parameter. Using less bins acts as a form of regularization. It is -generally recommended to use as many bins as possible, which is the default. +Overall, the computational cost of HGBT versus RF depends on the specific +characteristics of the dataset and the modeling task. It's a good idea +to try both models and compare their performance and computational efficiency +on your specific problem to determine which model is the best fit. -The ``l2_regularization`` parameter is a regularizer on the loss function and -corresponds to :math:`\lambda` in equation (2) of [XGBoost]_. +.. rubric:: Examples -Note that **early-stopping is enabled by default if the number of samples is -larger than 10,000**. The early-stopping behaviour is controlled via the -``early_stopping``, ``scoring``, ``validation_fraction``, -``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop -using an arbitrary :term:`scorer`, or just the training or validation loss. -Note that for technical reasons, using a scorer is significantly slower than -using the loss. By default, early-stopping is performed if there are at least -10,000 samples in the training set, using the validation loss. +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` -Missing values support ----------------------- +Extremely Randomized Trees +-------------------------- -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have built-in support for missing -values (NaNs). +In extremely randomized trees (see :class:`ExtraTreesClassifier` +and :class:`ExtraTreesRegressor` classes), randomness goes one step +further in the way splits are computed. As in random forests, a random +subset of candidate features is used, but instead of looking for the +most discriminative thresholds, thresholds are drawn at random for each +candidate feature and the best of these randomly-generated thresholds is +picked as the splitting rule. This usually allows to reduce the variance +of the model a bit more, at the expense of a slightly greater increase +in bias:: -During training, the tree grower learns at each split point whether samples -with missing values should go to the left or right child, based on the -potential gain. When predicting, samples with missing values are assigned to -the left or right child consequently:: + >>> from sklearn.model_selection import cross_val_score + >>> from sklearn.datasets import make_blobs + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.ensemble import ExtraTreesClassifier + >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn.ensemble import HistGradientBoostingClassifier - >>> import numpy as np + >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100, + ... random_state=0) - >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1) - >>> y = [0, 0, 1, 1] + >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2, + ... random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.98... - >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y) - >>> gbdt.predict(X) - array([0, 0, 1, 1]) + >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None, + ... min_samples_split=2, random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.999... -When the missingness pattern is predictive, the splits can be done on -whether the feature value is missing or not:: + >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, + ... min_samples_split=2, random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() > 0.999 + True - >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1) - >>> y = [0, 1, 0, 0, 1] - >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1, - ... max_depth=2, - ... learning_rate=1, - ... max_iter=1).fit(X, y) - >>> gbdt.predict(X) - array([0, 1, 0, 0, 1]) +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png + :target: ../auto_examples/ensemble/plot_forest_iris.html + :align: center + :scale: 75% -If no missing values were encountered for a given feature during training, -then samples with missing values are mapped to whichever child has the most -samples. +.. _random_forest_parameters: -.. _sw_hgbdt: +Parameters +---------- -Sample weight support ---------------------- +The main parameters to adjust when using these methods is ``n_estimators`` and +``max_features``. The former is the number of trees in the forest. The larger +the better, but also the longer it will take to compute. In addition, note that +results will stop getting significantly better beyond a critical number of +trees. The latter is the size of the random subsets of features to consider +when splitting a node. The lower the greater the reduction of variance, but +also the greater the increase in bias. Empirical good default values are +``max_features=1.0`` or equivalently ``max_features=None`` (always considering +all features instead of a random subset) for regression problems, and +``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``) +for classification tasks (where ``n_features`` is the number of features in +the data). The default value of ``max_features=1.0`` is equivalent to bagged +trees and more randomness can be achieved by setting smaller values (e.g. 0.3 +is a typical default in the literature). Good results are often achieved when +setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e., +when fully developing the trees). Bear in mind though that these values are +usually not optimal, and might result in models that consume a lot of RAM. +The best parameter values should always be cross-validated. In addition, note +that in random forests, bootstrap samples are used by default +(``bootstrap=True``) while the default strategy for extra-trees is to use the +whole dataset (``bootstrap=False``). When using bootstrap sampling the +generalization error can be estimated on the left out or out-of-bag samples. +This can be enabled by setting ``oob_score=True``. -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` sample support weights during -:term:`fit`. +.. note:: -The following toy example demonstrates how the model ignores the samples with -zero sample weights: + The size of the model with the default parameters is :math:`O( M * N * log (N) )`, + where :math:`M` is the number of trees and :math:`N` is the number of samples. + In order to reduce the size of the model, you can change these parameters: + ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``. - >>> X = [[1, 0], - ... [1, 0], - ... [1, 0], - ... [0, 1]] - >>> y = [0, 0, 1, 0] - >>> # ignore the first 2 training samples by setting their weight to 0 - >>> sample_weight = [0, 0, 1, 1] - >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1) - >>> gb.fit(X, y, sample_weight=sample_weight) - HistGradientBoostingClassifier(...) - >>> gb.predict([[1, 0]]) - array([1]) - >>> gb.predict_proba([[1, 0]])[0, 1] - 0.99... +Parallelization +--------------- -As you can see, the `[1, 0]` is comfortably classified as `1` since the first -two samples are ignored due to their sample weights. +Finally, this module also features the parallel construction of the trees +and the parallel computation of the predictions through the ``n_jobs`` +parameter. If ``n_jobs=k`` then computations are partitioned into +``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1`` +then all cores available on the machine are used. Note that because of +inter-process communication overhead, the speedup might not be linear +(i.e., using ``k`` jobs will unfortunately not be ``k`` times as +fast). Significant speedup can still be achieved though when building +a large number of trees, or when building a single tree requires a fair +amount of time (e.g., on large datasets). -Implementation detail: taking sample weights into account amounts to -multiplying the gradients (and the hessians) by the sample weights. Note that -the binning stage (specifically the quantiles computation) does not take the -weights into account. +.. rubric:: Examples -.. _categorical_support_gbdt: +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` -Categorical Features Support ----------------------------- +.. rubric:: References -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have native support for categorical -features: they can consider splits on non-ordered, categorical data. +.. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. -For datasets with categorical features, using the native categorical support -is often better than relying on one-hot encoding -(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding -requires more tree depth to achieve equivalent splits. It is also usually -better to rely on the native categorical support rather than to treat -categorical features as continuous (ordinal), which happens for ordinal-encoded -categorical data, since categories are nominal quantities where order does not -matter. +.. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. -To enable categorical support, a boolean mask can be passed to the -`categorical_features` parameter, indicating which feature is categorical. In -the following, the first feature will be treated as categorical and the -second feature as numerical:: +* P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized + trees", Machine Learning, 63(1), 3-42, 2006. - >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False]) +.. _random_forest_feature_importance: + +Feature importance evaluation +----------------------------- + +The relative rank (i.e. depth) of a feature used as a decision node in a +tree can be used to assess the relative importance of that feature with +respect to the predictability of the target variable. Features used at +the top of the tree contribute to the final prediction decision of a +larger fraction of the input samples. The **expected fraction of the +samples** they contribute to can thus be used as an estimate of the +**relative importance of the features**. In scikit-learn, the fraction of +samples a feature contributes to is combined with the decrease in impurity +from splitting them to create a normalized estimate of the predictive power +of that feature. + +By **averaging** the estimates of predictive ability over several randomized +trees one can **reduce the variance** of such an estimate and use it +for feature selection. This is known as the mean decrease in impurity, or MDI. +Refer to [L2014]_ for more information on MDI and feature importance +evaluation with Random Forests. + +.. warning:: + + The impurity-based feature importances computed on tree-based models suffer + from two flaws that can lead to misleading conclusions. First they are + computed on statistics derived from the training dataset and therefore **do + not necessarily inform us on which features are most important to make good + predictions on held-out dataset**. Secondly, **they favor high cardinality + features**, that is features with many unique values. + :ref:`permutation_importance` is an alternative to impurity-based feature + importance that does not suffer from these flaws. These two methods of + obtaining feature importance are explored in: + :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`. -Equivalently, one can pass a list of integers indicating the indices of the -categorical features:: +The following example shows a color-coded representation of the relative +importances of each individual pixel for a face recognition task using +a :class:`ExtraTreesClassifier` model. - >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0]) +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png + :target: ../auto_examples/ensemble/plot_forest_importances_faces.html + :align: center + :scale: 75 -The cardinality of each categorical feature should be less than the `max_bins` -parameter, and each categorical feature is expected to be encoded in -`[0, max_bins - 1]`. To that end, it might be useful to pre-process the data -with an :class:`~sklearn.preprocessing.OrdinalEncoder` as done in -:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`. +In practice those estimates are stored as an attribute named +``feature_importances_`` on the fitted model. This is an array with shape +``(n_features,)`` whose values are positive and sum to 1.0. The higher +the value, the more important is the contribution of the matching feature +to the prediction function. -If there are missing values during training, the missing values will be -treated as a proper category. If there are no missing values during training, -then at prediction time, missing values are mapped to the child node that has -the most samples (just like for continuous features). When predicting, -categories that were not seen during fit time will be treated as missing -values. +.. rubric:: Examples -**Split finding with categorical features**: The canonical way of considering -categorical splits in a tree is to consider -all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of -categories. This can quickly become prohibitive when :math:`K` is large. -Fortunately, since gradient boosting trees are always regression trees (even -for classification problems), there exist a faster strategy that can yield -equivalent splits. First, the categories of a feature are sorted according to -the variance of the target, for each category `k`. Once the categories are -sorted, one can consider *continuous partitions*, i.e. treat the categories -as if they were ordered continuous values (see Fisher [Fisher1958]_ for a -formal proof). As a result, only :math:`K - 1` splits need to be considered -instead of :math:`2^{K - 1} - 1`. The initial sorting is a -:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of -:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` -.. _monotonic_cst_gbdt: +.. rubric:: References -Monotonic Constraints ---------------------- +.. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to + Practice" <1407.7502>`, + PhD Thesis, U. of Liege, 2014. -Depending on the problem at hand, you may have prior knowledge indicating -that a given feature should in general have a positive (or negative) effect -on the target value. For example, all else being equal, a higher credit -score should increase the probability of getting approved for a loan. -Monotonic constraints allow you to incorporate such prior knowledge into the -model. +.. _random_trees_embedding: -For a predictor :math:`F` with two features: +Totally Random Trees Embedding +------------------------------ - - a **monotonic increase constraint** is a constraint of the form: - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2) +:class:`RandomTreesEmbedding` implements an unsupervised transformation of the +data. Using a forest of completely random trees, :class:`RandomTreesEmbedding` +encodes the data by the indices of the leaves a data point ends up in. This +index is then encoded in a one-of-K manner, leading to a high dimensional, +sparse binary coding. +This coding can be computed very efficiently and can then be used as a basis +for other learning tasks. +The size and sparsity of the code can be influenced by choosing the number of +trees and the maximum depth per tree. For each tree in the ensemble, the coding +contains one entry of one. The size of the coding is at most ``n_estimators * 2 +** max_depth``, the maximum number of leaves in the forest. - - a **monotonic decrease constraint** is a constraint of the form: - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2) +As neighboring data points are more likely to lie within the same leaf of a +tree, the transformation performs an implicit, non-parametric density +estimation. -You can specify a monotonic constraint on each feature using the -`monotonic_cst` parameter. For each feature, a value of 0 indicates no -constraint, while 1 and -1 indicate a monotonic increase and -monotonic decrease constraint, respectively:: +.. rubric:: Examples - >>> from sklearn.ensemble import HistGradientBoostingRegressor +* :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` - ... # monotonic increase, monotonic decrease, and no constraint on the 3 features - >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0]) +* :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear + dimensionality reduction techniques on handwritten digits. -In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed -to have a positive (negative) effect on the probability of samples -to belong to the positive class. +* :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares + supervised and unsupervised tree based feature transformations. -Nevertheless, monotonic constraints only marginally constrain feature effects on the output. -For instance, monotonic increase and decrease constraints cannot be used to enforce the -following modelling constraint: +.. seealso:: - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2') + :ref:`manifold` techniques can also be useful to derive non-linear + representations of feature space, also these approaches focus also on + dimensionality reduction. -Also, monotonic constraints are not supported for multiclass classification. +.. _tree_ensemble_warm_start: -.. note:: - Since categories are unordered quantities, it is not possible to enforce - monotonic constraints on categorical features. +Fitting additional trees +------------------------ -.. topic:: Examples: +RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all support +``warm_start=True`` which allows you to add more trees to an already fitted model. - * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` +:: -.. _interaction_cst_hgbt: + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + + >>> X, y = make_classification(n_samples=100, random_state=1) + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, y) # fit with 10 trees + >>> len(clf.estimators_) + 10 + >>> # set warm_start and increase num of estimators + >>> _ = clf.set_params(n_estimators=20, warm_start=True) + >>> _ = clf.fit(X, y) # fit additional 10 trees + >>> len(clf.estimators_) + 20 + +When ``random_state`` is also set, the internal random state is also preserved +between ``fit`` calls. This means that training a model once with ``n`` estimators is +the same as building the model iteratively via multiple ``fit`` calls, where the +final number of estimators is equal to ``n``. -Interaction constraints ------------------------ +:: -A priori, the histogram gradient boosting trees are allowed to use any feature -to split a node into child nodes. This creates so called interactions between -features, i.e. usage of different features as split along a branch. Sometimes, -one wants to restrict the possible interactions, see [Mayer2022]_. This can be -done by the parameter ``interaction_cst``, where one can specify the indices -of features that are allowed to interact. -For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]`` -forbids all interactions. -The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly -interacting features. Features 0 and 1 may interact with each other, as well -as features 1 and 2. But note that features 0 and 2 are forbidden to interact. -The following depicts a tree and the possible splits of the tree: + >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 + >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as `clf` above -.. code-block:: none +Note that this differs from the usual behavior of :term:`random_state` in that it does +*not* result in the same result across different calls. - 1 <- Both constraint groups could be applied from now on - / \ - 1 2 <- Left split still fulfills both constraint groups. - / \ / \ Right split at feature 2 has only group {1, 2} from now on. +.. _bagging: -LightGBM uses the same logic for overlapping groups. +Bagging meta-estimator +====================== -Note that features not listed in ``interaction_cst`` are automatically -assigned an interaction group for themselves. With again 3 features, this -means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``. +In ensemble algorithms, bagging methods form a class of algorithms which build +several instances of a black-box estimator on random subsets of the original +training set and then aggregate their individual predictions to form a final +prediction. These methods are used as a way to reduce the variance of a base +estimator (e.g., a decision tree), by introducing randomization into its +construction procedure and then making an ensemble out of it. In many cases, +bagging methods constitute a very simple way to improve with respect to a +single model, without making it necessary to adapt the underlying base +algorithm. As they provide a way to reduce overfitting, bagging methods work +best with strong and complex models (e.g., fully developed decision trees), in +contrast with boosting methods which usually work best with weak models (e.g., +shallow decision trees). -.. topic:: References +Bagging methods come in many flavours but mostly differ from each other by the +way they draw random subsets of the training set: - .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. - 2022. :doi:`Machine Learning Applications to Land and Structure Valuation - <10.3390/jrfm15050193>`. - Journal of Risk and Financial Management 15, no. 5: 193 +* When random subsets of the dataset are drawn as random subsets of the + samples, then this algorithm is known as Pasting [B1999]_. -Low-level parallelism ---------------------- +* When samples are drawn with replacement, then the method is known as + Bagging [B1996]_. -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have implementations that use OpenMP -for parallelization through Cython. For more details on how to control the -number of threads, please refer to our :ref:`parallelism` notes. +* When random subsets of the dataset are drawn as random subsets of + the features, then the method is known as Random Subspaces [H1998]_. -The following parts are parallelized: +* Finally, when base estimators are built on subsets of both samples and + features, then the method is known as Random Patches [LG2012]_. -- mapping samples from real values to integer-valued bins (finding the bin - thresholds is however sequential) -- building histograms is parallelized over features -- finding the best split point at a node is parallelized over features -- during fit, mapping samples into the left and right children is - parallelized over samples -- gradient and hessians computations are parallelized over samples -- predicting is parallelized over samples +In scikit-learn, bagging methods are offered as a unified +:class:`BaggingClassifier` meta-estimator (resp. :class:`BaggingRegressor`), +taking as input a user-specified estimator along with parameters +specifying the strategy to draw random subsets. In particular, ``max_samples`` +and ``max_features`` control the size of the subsets (in terms of samples and +features), while ``bootstrap`` and ``bootstrap_features`` control whether +samples and features are drawn with or without replacement. When using a subset +of the available samples the generalization accuracy can be estimated with the +out-of-bag samples by setting ``oob_score=True``. As an example, the +snippet below illustrates how to instantiate a bagging ensemble of +:class:`~sklearn.neighbors.KNeighborsClassifier` estimators, each built on random +subsets of 50% of the samples and 50% of the features. -.. _Why_it's_faster: + >>> from sklearn.ensemble import BaggingClassifier + >>> from sklearn.neighbors import KNeighborsClassifier + >>> bagging = BaggingClassifier(KNeighborsClassifier(), + ... max_samples=0.5, max_features=0.5) -Why it's faster ---------------- +.. rubric:: Examples -The bottleneck of a gradient boosting procedure is building the decision -trees. Building a traditional decision tree (as in the other GBDTs -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`) -requires sorting the samples at each node (for -each feature). Sorting is needed so that the potential gain of a split point -can be computed efficiently. Splitting a single node has thus a complexity -of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n` -is the number of samples at the node. +* :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the -feature values and instead use a data-structure called a histogram, where the -samples are implicitly ordered. Building a histogram has a -:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a -:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller -than the previous one. In addition, instead of considering :math:`n` split -points, we here consider only ``max_bins`` split points, which is much -smaller. +.. rubric:: References -In order to build histograms, the input data `X` needs to be binned into -integer-valued bins. This binning procedure does require sorting the feature -values, but it only happens once at the very beginning of the boosting process -(not at each node, like in :class:`GradientBoostingClassifier` and -:class:`GradientBoostingRegressor`). +.. [B1999] L. Breiman, "Pasting small votes for classification in large + databases and on-line", Machine Learning, 36(1), 85-103, 1999. -Finally, many parts of the implementation of -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` are parallelized. +.. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), + 123-140, 1996. -.. topic:: References +.. [H1998] T. Ho, "The random subspace method for constructing decision + forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, 1998. - .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree - Boosting System" <1603.02754>` +.. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", + Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. - .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient - BoostingDecision Tree" `_ - .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" - `_ - Journal of the American Statistical Association, 53, 789-798. .. _voting_classifier: @@ -1442,27 +1450,6 @@ Vector Machine, a Decision Tree, and a K-nearest neighbor classifier:: :align: center :scale: 75% -Using the `VotingClassifier` with `GridSearchCV` ------------------------------------------------- - -The :class:`VotingClassifier` can also be used together with -:class:`~sklearn.model_selection.GridSearchCV` in order to tune the -hyperparameters of the individual estimators:: - - >>> from sklearn.model_selection import GridSearchCV - >>> clf1 = LogisticRegression(random_state=1) - >>> clf2 = RandomForestClassifier(random_state=1) - >>> clf3 = GaussianNB() - >>> eclf = VotingClassifier( - ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft' - ... ) - - >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]} - - >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) - >>> grid = grid.fit(iris.data, iris.target) - Usage ----- @@ -1482,6 +1469,26 @@ Optionally, weights can be provided for the individual classifiers:: ... voting='soft', weights=[2,5,1] ... ) +.. dropdown:: Using the :class:`VotingClassifier` with :class:`~sklearn.model_selection.GridSearchCV` + + The :class:`VotingClassifier` can also be used together with + :class:`~sklearn.model_selection.GridSearchCV` in order to tune the + hyperparameters of the individual estimators:: + + >>> from sklearn.model_selection import GridSearchCV + >>> clf1 = LogisticRegression(random_state=1) + >>> clf2 = RandomForestClassifier(random_state=1) + >>> clf3 = GaussianNB() + >>> eclf = VotingClassifier( + ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], + ... voting='soft' + ... ) + + >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]} + + >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) + >>> grid = grid.fit(iris.data, iris.target) + .. _voting_regressor: Voting Regressor @@ -1518,9 +1525,9 @@ The following example shows how to fit the VotingRegressor:: :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py` .. _stacking: @@ -1639,7 +1646,95 @@ computationally expensive. ... .format(multi_layer_regressor.score(X_test, y_test))) R2 score: 0.53 -.. topic:: References +.. rubric:: References + +.. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2 + (1992): 241-259. + + + +.. _adaboost: + +AdaBoost +======== + +The module :mod:`sklearn.ensemble` includes the popular boosting algorithm +AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_. + +The core principle of AdaBoost is to fit a sequence of weak learners (i.e., +models that are only slightly better than random guessing, such as small +decision trees) on repeatedly modified versions of the data. The predictions +from all of them are then combined through a weighted majority vote (or sum) to +produce the final prediction. The data modifications at each so-called boosting +iteration consists of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N` +to each of the training samples. Initially, those weights are all set to +:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the +original data. For each successive iteration, the sample weights are +individually modified and the learning algorithm is reapplied to the reweighted +data. At a given step, those training examples that were incorrectly predicted +by the boosted model induced at the previous step have their weights increased, +whereas the weights are decreased for those that were predicted correctly. As +iterations proceed, examples that are difficult to predict receive +ever-increasing influence. Each subsequent weak learner is thereby forced to +concentrate on the examples that are missed by the previous ones in the sequence +[HTF]_. + +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_multiclass_001.png + :target: ../auto_examples/ensemble/plot_adaboost_multiclass.html + :align: center + :scale: 75 + +AdaBoost can be used both for classification and regression problems: + +- For multi-class classification, :class:`AdaBoostClassifier` implements + AdaBoost.SAMME [ZZRH2009]_. + +- For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_. + +Usage +----- + +The following example shows how to fit an AdaBoost classifier with 100 weak +learners:: + + >>> from sklearn.model_selection import cross_val_score + >>> from sklearn.datasets import load_iris + >>> from sklearn.ensemble import AdaBoostClassifier + + >>> X, y = load_iris(return_X_y=True) + >>> clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME",) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.9... + +The number of weak learners is controlled by the parameter ``n_estimators``. The +``learning_rate`` parameter controls the contribution of the weak learners in +the final combination. By default, weak learners are decision stumps. Different +weak learners can be specified through the ``estimator`` parameter. +The main parameters to tune to obtain good results are ``n_estimators`` and +the complexity of the base estimators (e.g., its depth ``max_depth`` or +minimum required number of samples to consider a split ``min_samples_split``). + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance + of AdaBoost on a multi-class problem. + +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary + and decision function values for a non-linearly separable two-class problem + using AdaBoost-SAMME. + +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression + with the AdaBoost.R2 algorithm. + +.. rubric:: References + +.. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of + On-Line Learning and an Application to Boosting", 1997. + +.. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", 2009. + +.. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. - .. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2 - (1992): 241-259. +.. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of Statistical Learning + Ed. 2", Springer, 2009. diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst index 1f4e974d6c087..2181014644e15 100644 --- a/doc/modules/feature_extraction.rst +++ b/doc/modules/feature_extraction.rst @@ -206,32 +206,32 @@ Note the use of a generator comprehension, which introduces laziness into the feature extraction: tokens are only processed on demand from the hasher. -Implementation details ----------------------- +.. dropdown:: Implementation details -:class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3. -As a result (and because of limitations in ``scipy.sparse``), -the maximum number of features supported is currently :math:`2^{31} - 1`. + :class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3. + As a result (and because of limitations in ``scipy.sparse``), + the maximum number of features supported is currently :math:`2^{31} - 1`. -The original formulation of the hashing trick by Weinberger et al. -used two separate hash functions :math:`h` and :math:`\xi` -to determine the column index and sign of a feature, respectively. -The present implementation works under the assumption -that the sign bit of MurmurHash3 is independent of its other bits. + The original formulation of the hashing trick by Weinberger et al. + used two separate hash functions :math:`h` and :math:`\xi` + to determine the column index and sign of a feature, respectively. + The present implementation works under the assumption + that the sign bit of MurmurHash3 is independent of its other bits. -Since a simple modulo is used to transform the hash function to a column index, -it is advisable to use a power of two as the ``n_features`` parameter; -otherwise the features will not be mapped evenly to the columns. + Since a simple modulo is used to transform the hash function to a column index, + it is advisable to use a power of two as the ``n_features`` parameter; + otherwise the features will not be mapped evenly to the columns. + .. rubric:: References -.. topic:: References: + * `MurmurHash3 `_. - * Kilian Weinberger, Anirban Dasgupta, John Langford, Alex Smola and - Josh Attenberg (2009). `Feature hashing for large scale multitask learning - `_. Proc. ICML. - * `MurmurHash3 `_. +.. rubric:: References +* Kilian Weinberger, Anirban Dasgupta, John Langford, Alex Smola and + Josh Attenberg (2009). `Feature hashing for large scale multitask learning + `_. Proc. ICML. .. _text_feature_extraction: @@ -307,7 +307,7 @@ counting in a single class:: This model has many parameters, however the default values are quite reasonable (please see the :ref:`reference documentation -` for the details):: +` for the details):: >>> vectorizer = CountVectorizer() >>> vectorizer @@ -396,7 +396,7 @@ last document:: .. _stop_words: Using stop words -................ +---------------- Stop words are words like "and", "the", "him", which are presumed to be uninformative in representing the content of a text, and which may be @@ -419,12 +419,13 @@ tokenizer, so if *we've* is in ``stop_words``, but *ve* is not, *ve* will be retained from *we've* in transformed text. Our vectorizers will try to identify and warn about some kinds of inconsistencies. -.. topic:: References +.. rubric:: References + +.. [NQY18] J. Nothman, H. Qin and R. Yurchak (2018). + `"Stop Word Lists in Free Open-source Software Packages" + `__. + In *Proc. Workshop for NLP Open Source Software*. - .. [NQY18] J. Nothman, H. Qin and R. Yurchak (2018). - `"Stop Word Lists in Free Open-source Software Packages" - `__. - In *Proc. Workshop for NLP Open Source Software*. .. _tfidf: @@ -488,126 +489,125 @@ class:: TfidfTransformer(smooth_idf=False) Again please see the :ref:`reference documentation -` for the details on all the parameters. - -Let's take an example with the following counts. The first term is present -100% of the time hence not very interesting. The two other features only -in less than 50% of the time hence probably more representative of the -content of the documents:: - - >>> counts = [[3, 0, 1], - ... [2, 0, 0], - ... [3, 0, 0], - ... [4, 0, 0], - ... [3, 2, 0], - ... [3, 0, 2]] - ... - >>> tfidf = transformer.fit_transform(counts) - >>> tfidf - <6x3 sparse matrix of type '<... 'numpy.float64'>' - with 9 stored elements in Compressed Sparse ... format> +` for the details on all the parameters. - >>> tfidf.toarray() - array([[0.81940995, 0. , 0.57320793], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [0.47330339, 0.88089948, 0. ], - [0.58149261, 0. , 0.81355169]]) +.. dropdown:: Numeric example of a tf-idf matrix -Each row is normalized to have unit Euclidean norm: + Let's take an example with the following counts. The first term is present + 100% of the time hence not very interesting. The two other features only + in less than 50% of the time hence probably more representative of the + content of the documents:: -:math:`v_{norm} = \frac{v}{||v||_2} = \frac{v}{\sqrt{v{_1}^2 + -v{_2}^2 + \dots + v{_n}^2}}` + >>> counts = [[3, 0, 1], + ... [2, 0, 0], + ... [3, 0, 0], + ... [4, 0, 0], + ... [3, 2, 0], + ... [3, 0, 2]] + ... + >>> tfidf = transformer.fit_transform(counts) + >>> tfidf + <6x3 sparse matrix of type '<... 'numpy.float64'>' + with 9 stored elements in Compressed Sparse ... format> -For example, we can compute the tf-idf of the first term in the first -document in the `counts` array as follows: + >>> tfidf.toarray() + array([[0.81940995, 0. , 0.57320793], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [0.47330339, 0.88089948, 0. ], + [0.58149261, 0. , 0.81355169]]) -:math:`n = 6` + Each row is normalized to have unit Euclidean norm: -:math:`\text{df}(t)_{\text{term1}} = 6` + :math:`v_{norm} = \frac{v}{||v||_2} = \frac{v}{\sqrt{v{_1}^2 + + v{_2}^2 + \dots + v{_n}^2}}` -:math:`\text{idf}(t)_{\text{term1}} = -\log \frac{n}{\text{df}(t)} + 1 = \log(1)+1 = 1` + For example, we can compute the tf-idf of the first term in the first + document in the `counts` array as follows: -:math:`\text{tf-idf}_{\text{term1}} = \text{tf} \times \text{idf} = 3 \times 1 = 3` + :math:`n = 6` -Now, if we repeat this computation for the remaining 2 terms in the document, -we get + :math:`\text{df}(t)_{\text{term1}} = 6` -:math:`\text{tf-idf}_{\text{term2}} = 0 \times (\log(6/1)+1) = 0` + :math:`\text{idf}(t)_{\text{term1}} = + \log \frac{n}{\text{df}(t)} + 1 = \log(1)+1 = 1` -:math:`\text{tf-idf}_{\text{term3}} = 1 \times (\log(6/2)+1) \approx 2.0986` + :math:`\text{tf-idf}_{\text{term1}} = \text{tf} \times \text{idf} = 3 \times 1 = 3` -and the vector of raw tf-idfs: + Now, if we repeat this computation for the remaining 2 terms in the document, + we get -:math:`\text{tf-idf}_{\text{raw}} = [3, 0, 2.0986].` + :math:`\text{tf-idf}_{\text{term2}} = 0 \times (\log(6/1)+1) = 0` + :math:`\text{tf-idf}_{\text{term3}} = 1 \times (\log(6/2)+1) \approx 2.0986` -Then, applying the Euclidean (L2) norm, we obtain the following tf-idfs -for document 1: + and the vector of raw tf-idfs: -:math:`\frac{[3, 0, 2.0986]}{\sqrt{\big(3^2 + 0^2 + 2.0986^2\big)}} -= [ 0.819, 0, 0.573].` + :math:`\text{tf-idf}_{\text{raw}} = [3, 0, 2.0986].` -Furthermore, the default parameter ``smooth_idf=True`` adds "1" to the numerator -and denominator as if an extra document was seen containing every term in the -collection exactly once, which prevents zero divisions: -:math:`\text{idf}(t) = \log{\frac{1 + n}{1+\text{df}(t)}} + 1` + Then, applying the Euclidean (L2) norm, we obtain the following tf-idfs + for document 1: -Using this modification, the tf-idf of the third term in document 1 changes to -1.8473: + :math:`\frac{[3, 0, 2.0986]}{\sqrt{\big(3^2 + 0^2 + 2.0986^2\big)}} + = [ 0.819, 0, 0.573].` -:math:`\text{tf-idf}_{\text{term3}} = 1 \times \log(7/3)+1 \approx 1.8473` + Furthermore, the default parameter ``smooth_idf=True`` adds "1" to the numerator + and denominator as if an extra document was seen containing every term in the + collection exactly once, which prevents zero divisions: -And the L2-normalized tf-idf changes to + :math:`\text{idf}(t) = \log{\frac{1 + n}{1+\text{df}(t)}} + 1` -:math:`\frac{[3, 0, 1.8473]}{\sqrt{\big(3^2 + 0^2 + 1.8473^2\big)}} -= [0.8515, 0, 0.5243]`:: + Using this modification, the tf-idf of the third term in document 1 changes to + 1.8473: - >>> transformer = TfidfTransformer() - >>> transformer.fit_transform(counts).toarray() - array([[0.85151335, 0. , 0.52433293], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [0.55422893, 0.83236428, 0. ], - [0.63035731, 0. , 0.77630514]]) + :math:`\text{tf-idf}_{\text{term3}} = 1 \times \log(7/3)+1 \approx 1.8473` -The weights of each -feature computed by the ``fit`` method call are stored in a model -attribute:: + And the L2-normalized tf-idf changes to - >>> transformer.idf_ - array([1. ..., 2.25..., 1.84...]) + :math:`\frac{[3, 0, 1.8473]}{\sqrt{\big(3^2 + 0^2 + 1.8473^2\big)}} + = [0.8515, 0, 0.5243]`:: + >>> transformer = TfidfTransformer() + >>> transformer.fit_transform(counts).toarray() + array([[0.85151335, 0. , 0.52433293], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [0.55422893, 0.83236428, 0. ], + [0.63035731, 0. , 0.77630514]]) + The weights of each + feature computed by the ``fit`` method call are stored in a model + attribute:: + >>> transformer.idf_ + array([1. ..., 2.25..., 1.84...]) -As tf–idf is very often used for text features, there is also another -class called :class:`TfidfVectorizer` that combines all the options of -:class:`CountVectorizer` and :class:`TfidfTransformer` in a single model:: + As tf-idf is very often used for text features, there is also another + class called :class:`TfidfVectorizer` that combines all the options of + :class:`CountVectorizer` and :class:`TfidfTransformer` in a single model:: - >>> from sklearn.feature_extraction.text import TfidfVectorizer - >>> vectorizer = TfidfVectorizer() - >>> vectorizer.fit_transform(corpus) - <4x9 sparse matrix of type '<... 'numpy.float64'>' - with 19 stored elements in Compressed Sparse ... format> + >>> from sklearn.feature_extraction.text import TfidfVectorizer + >>> vectorizer = TfidfVectorizer() + >>> vectorizer.fit_transform(corpus) + <4x9 sparse matrix of type '<... 'numpy.float64'>' + with 19 stored elements in Compressed Sparse ... format> -While the tf–idf normalization is often very useful, there might -be cases where the binary occurrence markers might offer better -features. This can be achieved by using the ``binary`` parameter -of :class:`CountVectorizer`. In particular, some estimators such as -:ref:`bernoulli_naive_bayes` explicitly model discrete boolean random -variables. Also, very short texts are likely to have noisy tf–idf values -while the binary occurrence info is more stable. + While the tf-idf normalization is often very useful, there might + be cases where the binary occurrence markers might offer better + features. This can be achieved by using the ``binary`` parameter + of :class:`CountVectorizer`. In particular, some estimators such as + :ref:`bernoulli_naive_bayes` explicitly model discrete boolean random + variables. Also, very short texts are likely to have noisy tf-idf values + while the binary occurrence info is more stable. -As usual the best way to adjust the feature extraction parameters -is to use a cross-validated grid search, for instance by pipelining the -feature extractor with a classifier: + As usual the best way to adjust the feature extraction parameters + is to use a cross-validated grid search, for instance by pipelining the + feature extractor with a classifier: - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` Decoding text files @@ -637,58 +637,59 @@ or ``"replace"``. See the documentation for the Python function ``bytes.decode`` for more details (type ``help(bytes.decode)`` at the Python prompt). -If you are having trouble decoding text, here are some things to try: - -- Find out what the actual encoding of the text is. The file might come - with a header or README that tells you the encoding, or there might be some - standard encoding you can assume based on where the text comes from. - -- You may be able to find out what kind of encoding it is in general - using the UNIX command ``file``. The Python ``chardet`` module comes with - a script called ``chardetect.py`` that will guess the specific encoding, - though you cannot rely on its guess being correct. - -- You could try UTF-8 and disregard the errors. You can decode byte - strings with ``bytes.decode(errors='replace')`` to replace all - decoding errors with a meaningless character, or set - ``decode_error='replace'`` in the vectorizer. This may damage the - usefulness of your features. - -- Real text may come from a variety of sources that may have used different - encodings, or even be sloppily decoded in a different encoding than the - one it was encoded with. This is common in text retrieved from the Web. - The Python package `ftfy`_ can automatically sort out some classes of - decoding errors, so you could try decoding the unknown text as ``latin-1`` - and then using ``ftfy`` to fix errors. - -- If the text is in a mish-mash of encodings that is simply too hard to sort - out (which is the case for the 20 Newsgroups dataset), you can fall back on - a simple single-byte encoding such as ``latin-1``. Some text may display - incorrectly, but at least the same sequence of bytes will always represent - the same feature. - -For example, the following snippet uses ``chardet`` -(not shipped with scikit-learn, must be installed separately) -to figure out the encoding of three texts. -It then vectorizes the texts and prints the learned vocabulary. -The output is not shown here. - - >>> import chardet # doctest: +SKIP - >>> text1 = b"Sei mir gegr\xc3\xbc\xc3\x9ft mein Sauerkraut" - >>> text2 = b"holdselig sind deine Ger\xfcche" - >>> text3 = b"\xff\xfeA\x00u\x00f\x00 \x00F\x00l\x00\xfc\x00g\x00e\x00l\x00n\x00 \x00d\x00e\x00s\x00 \x00G\x00e\x00s\x00a\x00n\x00g\x00e\x00s\x00,\x00 \x00H\x00e\x00r\x00z\x00l\x00i\x00e\x00b\x00c\x00h\x00e\x00n\x00,\x00 \x00t\x00r\x00a\x00g\x00 \x00i\x00c\x00h\x00 \x00d\x00i\x00c\x00h\x00 \x00f\x00o\x00r\x00t\x00" - >>> decoded = [x.decode(chardet.detect(x)['encoding']) - ... for x in (text1, text2, text3)] # doctest: +SKIP - >>> v = CountVectorizer().fit(decoded).vocabulary_ # doctest: +SKIP - >>> for term in v: print(v) # doctest: +SKIP - -(Depending on the version of ``chardet``, it might get the first one wrong.) - -For an introduction to Unicode and character encodings in general, -see Joel Spolsky's `Absolute Minimum Every Software Developer Must Know -About Unicode `_. - -.. _`ftfy`: https://github.com/LuminosoInsight/python-ftfy +.. dropdown:: Troubleshooting decoding text + + If you are having trouble decoding text, here are some things to try: + + - Find out what the actual encoding of the text is. The file might come + with a header or README that tells you the encoding, or there might be some + standard encoding you can assume based on where the text comes from. + + - You may be able to find out what kind of encoding it is in general + using the UNIX command ``file``. The Python ``chardet`` module comes with + a script called ``chardetect.py`` that will guess the specific encoding, + though you cannot rely on its guess being correct. + + - You could try UTF-8 and disregard the errors. You can decode byte + strings with ``bytes.decode(errors='replace')`` to replace all + decoding errors with a meaningless character, or set + ``decode_error='replace'`` in the vectorizer. This may damage the + usefulness of your features. + + - Real text may come from a variety of sources that may have used different + encodings, or even be sloppily decoded in a different encoding than the + one it was encoded with. This is common in text retrieved from the Web. + The Python package `ftfy `__ + can automatically sort out some classes of + decoding errors, so you could try decoding the unknown text as ``latin-1`` + and then using ``ftfy`` to fix errors. + + - If the text is in a mish-mash of encodings that is simply too hard to sort + out (which is the case for the 20 Newsgroups dataset), you can fall back on + a simple single-byte encoding such as ``latin-1``. Some text may display + incorrectly, but at least the same sequence of bytes will always represent + the same feature. + + For example, the following snippet uses ``chardet`` + (not shipped with scikit-learn, must be installed separately) + to figure out the encoding of three texts. + It then vectorizes the texts and prints the learned vocabulary. + The output is not shown here. + + >>> import chardet # doctest: +SKIP + >>> text1 = b"Sei mir gegr\xc3\xbc\xc3\x9ft mein Sauerkraut" + >>> text2 = b"holdselig sind deine Ger\xfcche" + >>> text3 = b"\xff\xfeA\x00u\x00f\x00 \x00F\x00l\x00\xfc\x00g\x00e\x00l\x00n\x00 \x00d\x00e\x00s\x00 \x00G\x00e\x00s\x00a\x00n\x00g\x00e\x00s\x00,\x00 \x00H\x00e\x00r\x00z\x00l\x00i\x00e\x00b\x00c\x00h\x00e\x00n\x00,\x00 \x00t\x00r\x00a\x00g\x00 \x00i\x00c\x00h\x00 \x00d\x00i\x00c\x00h\x00 \x00f\x00o\x00r\x00t\x00" + >>> decoded = [x.decode(chardet.detect(x)['encoding']) + ... for x in (text1, text2, text3)] # doctest: +SKIP + >>> v = CountVectorizer().fit(decoded).vocabulary_ # doctest: +SKIP + >>> for term in v: print(v) # doctest: +SKIP + + (Depending on the version of ``chardet``, it might get the first one wrong.) + + For an introduction to Unicode and character encodings in general, + see Joel Spolsky's `Absolute Minimum Every Software Developer Must Know + About Unicode `_. Applications and examples @@ -701,18 +702,18 @@ In particular in a **supervised setting** it can be successfully combined with fast and scalable linear models to train **document classifiers**, for instance: - * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` In an **unsupervised setting** it can be used to group similar documents together by applying clustering algorithms such as :ref:`k_means`: - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` Finally it is possible to discover the main topics of a corpus by relaxing the hard assignment constraint of clustering, for instance by using :ref:`NMF`: - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` Limitations of the Bag of Words representation @@ -870,25 +871,25 @@ The :class:`HashingVectorizer` also comes with the following limitations: model. A :class:`TfidfTransformer` can be appended to it in a pipeline if required. -Performing out-of-core scaling with HashingVectorizer ------------------------------------------------------- +.. dropdown:: Performing out-of-core scaling with HashingVectorizer + + An interesting development of using a :class:`HashingVectorizer` is the ability + to perform `out-of-core`_ scaling. This means that we can learn from data that + does not fit into the computer's main memory. -An interesting development of using a :class:`HashingVectorizer` is the ability -to perform `out-of-core`_ scaling. This means that we can learn from data that -does not fit into the computer's main memory. + .. _out-of-core: https://en.wikipedia.org/wiki/Out-of-core_algorithm -.. _out-of-core: https://en.wikipedia.org/wiki/Out-of-core_algorithm + A strategy to implement out-of-core scaling is to stream data to the estimator + in mini-batches. Each mini-batch is vectorized using :class:`HashingVectorizer` + so as to guarantee that the input space of the estimator has always the same + dimensionality. The amount of memory used at any time is thus bounded by the + size of a mini-batch. Although there is no limit to the amount of data that can + be ingested using such an approach, from a practical point of view the learning + time is often limited by the CPU time one wants to spend on the task. -A strategy to implement out-of-core scaling is to stream data to the estimator -in mini-batches. Each mini-batch is vectorized using :class:`HashingVectorizer` -so as to guarantee that the input space of the estimator has always the same -dimensionality. The amount of memory used at any time is thus bounded by the -size of a mini-batch. Although there is no limit to the amount of data that can -be ingested using such an approach, from a practical point of view the learning -time is often limited by the CPU time one wants to spend on the task. + For a full-fledged example of out-of-core scaling in a text classification + task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. -For a full-fledged example of out-of-core scaling in a text classification -task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. Customizing the vectorizer classes ---------------------------------- @@ -906,19 +907,19 @@ to the vectorizer constructor:: In particular we name: - * ``preprocessor``: a callable that takes an entire document as input (as a - single string), and returns a possibly transformed version of the document, - still as an entire string. This can be used to remove HTML tags, lowercase - the entire document, etc. +* ``preprocessor``: a callable that takes an entire document as input (as a + single string), and returns a possibly transformed version of the document, + still as an entire string. This can be used to remove HTML tags, lowercase + the entire document, etc. - * ``tokenizer``: a callable that takes the output from the preprocessor - and splits it into tokens, then returns a list of these. +* ``tokenizer``: a callable that takes the output from the preprocessor + and splits it into tokens, then returns a list of these. - * ``analyzer``: a callable that replaces the preprocessor and tokenizer. - The default analyzers all call the preprocessor and tokenizer, but custom - analyzers will skip this. N-gram extraction and stop word filtering take - place at the analyzer level, so a custom analyzer may have to reproduce - these steps. +* ``analyzer``: a callable that replaces the preprocessor and tokenizer. + The default analyzers all call the preprocessor and tokenizer, but custom + analyzers will skip this. N-gram extraction and stop word filtering take + place at the analyzer level, so a custom analyzer may have to reproduce + these steps. (Lucene users might recognize these names, but be aware that scikit-learn concepts may not map one-to-one onto Lucene concepts.) @@ -928,7 +929,8 @@ parameters it is possible to derive from the class and override the ``build_preprocessor``, ``build_tokenizer`` and ``build_analyzer`` factory methods instead of passing custom functions. -Some tips and tricks: +.. dropdown:: Tips and tricks + :color: success * If documents are pre-tokenized by an external package, then store them in files (or strings) with the tokens separated by whitespace and pass @@ -952,7 +954,6 @@ Some tips and tricks: (Note that this will not filter out punctuation.) - The following example will, for instance, transform some British spelling to American spelling:: @@ -976,11 +977,10 @@ Some tips and tricks: for other styles of preprocessing; examples include stemming, lemmatization, or normalizing numerical tokens, with the latter illustrated in: - * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` - + * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` -Customizing the vectorizer can also be useful when handling Asian languages -that do not use an explicit word separator such as whitespace. + Customizing the vectorizer can also be useful when handling Asian languages + that do not use an explicit word separator such as whitespace. .. _image_feature_extraction: diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index ec902979d5600..6746f2f65da00 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -57,18 +57,18 @@ univariate statistical tests. It can be seen as a preprocessing step to an estimator. Scikit-learn exposes feature selection routines as objects that implement the ``transform`` method: - * :class:`SelectKBest` removes all but the :math:`k` highest scoring features +* :class:`SelectKBest` removes all but the :math:`k` highest scoring features - * :class:`SelectPercentile` removes all but a user-specified highest scoring - percentage of features +* :class:`SelectPercentile` removes all but a user-specified highest scoring + percentage of features - * using common univariate statistical tests for each feature: - false positive rate :class:`SelectFpr`, false discovery rate - :class:`SelectFdr`, or family wise error :class:`SelectFwe`. +* using common univariate statistical tests for each feature: + false positive rate :class:`SelectFpr`, false discovery rate + :class:`SelectFdr`, or family wise error :class:`SelectFwe`. - * :class:`GenericUnivariateSelect` allows to perform univariate feature - selection with a configurable strategy. This allows to select the best - univariate selection strategy with hyper-parameter search estimator. +* :class:`GenericUnivariateSelect` allows to perform univariate feature + selection with a configurable strategy. This allows to select the best + univariate selection strategy with hyper-parameter search estimator. For instance, we can use a F-test to retrieve the two best features for a dataset as follows: @@ -87,9 +87,9 @@ These objects take as input a scoring function that returns univariate scores and p-values (or only scores for :class:`SelectKBest` and :class:`SelectPercentile`): - * For regression: :func:`r_regression`, :func:`f_regression`, :func:`mutual_info_regression` +* For regression: :func:`r_regression`, :func:`f_regression`, :func:`mutual_info_regression` - * For classification: :func:`chi2`, :func:`f_classif`, :func:`mutual_info_classif` +* For classification: :func:`chi2`, :func:`f_classif`, :func:`mutual_info_classif` The methods based on F-test estimate the degree of linear dependency between two random variables. On the other hand, mutual information methods can capture @@ -108,11 +108,17 @@ applied to non-negative features, such as frequencies. Beware not to use a regression scoring function with a classification problem, you will get useless results. -.. topic:: Examples: +.. note:: - * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection.py` + The :class:`SelectPercentile` and :class:`SelectKBest` support unsupervised + feature selection as well. One needs to provide a `score_func` where `y=None`. + The `score_func` should use internally `X` to compute the scores. - * :ref:`sphx_glr_auto_examples_feature_selection_plot_f_test_vs_mi.py` +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection.py` + +* :ref:`sphx_glr_auto_examples_feature_selection_plot_f_test_vs_mi.py` .. _rfe: @@ -130,16 +136,22 @@ repeated on the pruned set until the desired number of features to select is eventually reached. :class:`RFECV` performs RFE in a cross-validation loop to find the optimal -number of features. +number of features. In more details, the number of features selected is tuned +automatically by fitting an :class:`RFE` selector on the different +cross-validation splits (provided by the `cv` parameter). The performance +of the :class:`RFE` selector are evaluated using `scorer` for different number +of selected features and aggregated together. Finally, the scores are averaged +across folds and the number of features selected is set to the number of +features that maximize the cross-validation score. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_digits.py`: A recursive feature elimination example - showing the relevance of pixels in a digit classification task. +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_digits.py`: A recursive feature elimination example + showing the relevance of pixels in a digit classification task. - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`: A recursive feature - elimination example with automatic tuning of the number of features - selected with cross-validation. +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`: A recursive feature + elimination example with automatic tuning of the number of features + selected with cross-validation. .. _select_from_model: @@ -159,9 +171,9 @@ Available heuristics are "mean", "median" and float multiples of these like For examples on how it is to be used refer to the sections below. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` .. _l1_feature_selection: @@ -195,35 +207,38 @@ With SVMs and logistic-regression, the parameter C controls the sparsity: the smaller C the fewer features selected. With Lasso, the higher the alpha parameter, the fewer features selected. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_dense_vs_sparse_data.py`. +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_dense_vs_sparse_data.py`. .. _compressive_sensing: -.. topic:: **L1-recovery and compressive sensing** +.. dropdown:: L1-recovery and compressive sensing - For a good choice of alpha, the :ref:`lasso` can fully recover the - exact set of non-zero variables using only few observations, provided - certain specific conditions are met. In particular, the number of - samples should be "sufficiently large", or L1 models will perform at - random, where "sufficiently large" depends on the number of non-zero - coefficients, the logarithm of the number of features, the amount of - noise, the smallest absolute value of non-zero coefficients, and the - structure of the design matrix X. In addition, the design matrix must - display certain specific properties, such as not being too correlated. + For a good choice of alpha, the :ref:`lasso` can fully recover the + exact set of non-zero variables using only few observations, provided + certain specific conditions are met. In particular, the number of + samples should be "sufficiently large", or L1 models will perform at + random, where "sufficiently large" depends on the number of non-zero + coefficients, the logarithm of the number of features, the amount of + noise, the smallest absolute value of non-zero coefficients, and the + structure of the design matrix X. In addition, the design matrix must + display certain specific properties, such as not being too correlated. - There is no general rule to select an alpha parameter for recovery of - non-zero coefficients. It can by set by cross-validation - (:class:`LassoCV` or :class:`LassoLarsCV`), though this may lead to - under-penalized models: including a small number of non-relevant - variables is not detrimental to prediction score. BIC - (:class:`LassoLarsIC`) tends, on the opposite, to set high values of - alpha. + There is no general rule to select an alpha parameter for recovery of + non-zero coefficients. It can by set by cross-validation + (:class:`~sklearn.linear_model.LassoCV` or + :class:`~sklearn.linear_model.LassoLarsCV`), though this may lead to + under-penalized models: including a small number of non-relevant variables + is not detrimental to prediction score. BIC + (:class:`~sklearn.linear_model.LassoLarsIC`) tends, on the opposite, to set + high values of alpha. - **Reference** Richard G. Baraniuk "Compressive Sensing", IEEE Signal - Processing Magazine [120] July 2007 - http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf + .. rubric:: References + + Richard G. Baraniuk "Compressive Sensing", IEEE Signal + Processing Magazine [120] July 2007 + http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf Tree-based feature selection @@ -250,14 +265,13 @@ meta-transformer):: >>> X_new.shape # doctest: +SKIP (150, 2) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`: example on - synthetic data showing the recovery of the actually meaningful - features. +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`: example on + synthetic data showing the recovery of the actually meaningful features. - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`: example - on face recognition data. +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`: example + on face recognition data. .. _sequential_feature_selection: @@ -281,33 +295,36 @@ instead of starting with no features and greedily adding features, we start with *all* the features and greedily *remove* features from the set. The `direction` parameter controls whether forward or backward SFS is used. -In general, forward and backward selection do not yield equivalent results. -Also, one may be much faster than the other depending on the requested number -of selected features: if we have 10 features and ask for 7 selected features, -forward selection would need to perform 7 iterations while backward selection -would only need to perform 3. - -SFS differs from :class:`~sklearn.feature_selection.RFE` and -:class:`~sklearn.feature_selection.SelectFromModel` in that it does not -require the underlying model to expose a `coef_` or `feature_importances_` -attribute. It may however be slower considering that more models need to be -evaluated, compared to the other approaches. For example in backward -selection, the iteration going from `m` features to `m - 1` features using k-fold -cross-validation requires fitting `m * k` models, while -:class:`~sklearn.feature_selection.RFE` would require only a single fit, and -:class:`~sklearn.feature_selection.SelectFromModel` always just does a single -fit and requires no iterations. +.. dropdown:: Details on Sequential Feature Selection -.. topic:: Examples + In general, forward and backward selection do not yield equivalent results. + Also, one may be much faster than the other depending on the requested number + of selected features: if we have 10 features and ask for 7 selected features, + forward selection would need to perform 7 iterations while backward selection + would only need to perform 3. - * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` + SFS differs from :class:`~sklearn.feature_selection.RFE` and + :class:`~sklearn.feature_selection.SelectFromModel` in that it does not + require the underlying model to expose a `coef_` or `feature_importances_` + attribute. It may however be slower considering that more models need to be + evaluated, compared to the other approaches. For example in backward + selection, the iteration going from `m` features to `m - 1` features using k-fold + cross-validation requires fitting `m * k` models, while + :class:`~sklearn.feature_selection.RFE` would require only a single fit, and + :class:`~sklearn.feature_selection.SelectFromModel` always just does a single + fit and requires no iterations. -.. topic:: References: + .. rubric:: References - .. [sfs] Ferri et al, `Comparative study of techniques for + .. [sfs] Ferri et al, `Comparative study of techniques for large-scale feature selection `_. + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` + Feature selection as part of a pipeline ======================================= @@ -316,7 +333,7 @@ the actual learning. The recommended way to do this in scikit-learn is to use a :class:`~pipeline.Pipeline`:: clf = Pipeline([ - ('feature_selection', SelectFromModel(LinearSVC(dual="auto", penalty="l1"))), + ('feature_selection', SelectFromModel(LinearSVC(penalty="l1"))), ('classification', RandomForestClassifier()) ]) clf.fit(X, y) diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index db490bc1309d3..fb87120205f96 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -1,5 +1,3 @@ - - .. _gaussian_process: ================== @@ -8,30 +6,30 @@ Gaussian Processes .. currentmodule:: sklearn.gaussian_process -**Gaussian Processes (GP)** are a generic supervised learning method designed +**Gaussian Processes (GP)** are a nonparametric supervised learning method used to solve *regression* and *probabilistic classification* problems. The advantages of Gaussian processes are: - - The prediction interpolates the observations (at least for regular - kernels). +- The prediction interpolates the observations (at least for regular + kernels). - - The prediction is probabilistic (Gaussian) so that one can compute - empirical confidence intervals and decide based on those if one should - refit (online fitting, adaptive fitting) the prediction in some - region of interest. +- The prediction is probabilistic (Gaussian) so that one can compute + empirical confidence intervals and decide based on those if one should + refit (online fitting, adaptive fitting) the prediction in some + region of interest. - - Versatile: different :ref:`kernels - ` can be specified. Common kernels are provided, but - it is also possible to specify custom kernels. +- Versatile: different :ref:`kernels + ` can be specified. Common kernels are provided, but + it is also possible to specify custom kernels. The disadvantages of Gaussian processes include: - - They are not sparse, i.e., they use the whole samples/features information to - perform the prediction. +- Our implementation is not sparse, i.e., they use the whole samples/features + information to perform the prediction. - - They lose efficiency in high dimensional spaces -- namely when the number - of features exceeds a few dozens. +- They lose efficiency in high dimensional spaces -- namely when the number + of features exceeds a few dozens. .. _gpr: @@ -42,31 +40,44 @@ Gaussian Process Regression (GPR) .. currentmodule:: sklearn.gaussian_process The :class:`GaussianProcessRegressor` implements Gaussian processes (GP) for -regression purposes. For this, the prior of the GP needs to be specified. The -prior mean is assumed to be constant and zero (for ``normalize_y=False``) or the -training data's mean (for ``normalize_y=True``). The prior's -covariance is specified by passing a :ref:`kernel ` object. The -hyperparameters of the kernel are optimized during fitting of -GaussianProcessRegressor by maximizing the log-marginal-likelihood (LML) based -on the passed ``optimizer``. As the LML may have multiple local optima, the -optimizer can be started repeatedly by specifying ``n_restarts_optimizer``. The -first run is always conducted starting from the initial hyperparameter values -of the kernel; subsequent runs are conducted from hyperparameter values -that have been chosen randomly from the range of allowed values. -If the initial hyperparameters should be kept fixed, `None` can be passed as -optimizer. +regression purposes. For this, the prior of the GP needs to be specified. GP +will combine this prior and the likelihood function based on training samples. +It allows to give a probabilistic approach to prediction by giving the mean and +standard deviation as output when predicting. -The noise level in the targets can be specified by passing it via the -parameter ``alpha``, either globally as a scalar or per datapoint. -Note that a moderate noise level can also be helpful for dealing with numeric -issues during fitting as it is effectively implemented as Tikhonov -regularization, i.e., by adding it to the diagonal of the kernel matrix. An -alternative to specifying the noise level explicitly is to include a -WhiteKernel component into the kernel, which can estimate the global noise -level from the data (see example below). +.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_002.png + :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html + :align: center + +The prior mean is assumed to be constant and zero (for `normalize_y=False`) or +the training data's mean (for `normalize_y=True`). The prior's covariance is +specified by passing a :ref:`kernel ` object. The hyperparameters +of the kernel are optimized when fitting the :class:`GaussianProcessRegressor` +by maximizing the log-marginal-likelihood (LML) based on the passed +`optimizer`. As the LML may have multiple local optima, the optimizer can be +started repeatedly by specifying `n_restarts_optimizer`. The first run is +always conducted starting from the initial hyperparameter values of the kernel; +subsequent runs are conducted from hyperparameter values that have been chosen +randomly from the range of allowed values. If the initial hyperparameters +should be kept fixed, `None` can be passed as optimizer. + +The noise level in the targets can be specified by passing it via the parameter +`alpha`, either globally as a scalar or per datapoint. Note that a moderate +noise level can also be helpful for dealing with numeric instabilities during +fitting as it is effectively implemented as Tikhonov regularization, i.e., by +adding it to the diagonal of the kernel matrix. An alternative to specifying +the noise level explicitly is to include a +:class:`~sklearn.gaussian_process.kernels.WhiteKernel` component into the +kernel, which can estimate the global noise level from the data (see example +below). The figure below shows the effect of noisy target handled by setting +the parameter `alpha`. + +.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_003.png + :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html + :align: center The implementation is based on Algorithm 2.1 of [RW2006]_. In addition to -the API of standard scikit-learn estimators, GaussianProcessRegressor: +the API of standard scikit-learn estimators, :class:`GaussianProcessRegressor`: * allows prediction without prior fitting (based on the GP prior) @@ -77,149 +88,12 @@ the API of standard scikit-learn estimators, GaussianProcessRegressor: externally for other ways of selecting hyperparameters, e.g., via Markov chain Monte Carlo. +.. rubric:: Examples -GPR examples -============ - -GPR with noise-level estimation -------------------------------- -This example illustrates that GPR with a sum-kernel including a WhiteKernel can -estimate the noise level of data. An illustration of the -log-marginal-likelihood (LML) landscape shows that there exist two local -maxima of LML. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_003.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - -The first corresponds to a model with a high noise level and a -large length scale, which explains all variations in the data by noise. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_004.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - -The second one has a smaller noise level and shorter length scale, which explains -most of the variation by the noise-free functional relationship. The second -model has a higher likelihood; however, depending on the initial value for the -hyperparameters, the gradient-based optimization might also converge to the -high-noise solution. It is thus important to repeat the optimization several -times for different initializations. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_005.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - - -Comparison of GPR and Kernel Ridge Regression ---------------------------------------------- - -Both kernel ridge regression (KRR) and GPR learn -a target function by employing internally the "kernel trick". KRR learns a -linear function in the space induced by the respective kernel which corresponds -to a non-linear function in the original space. The linear function in the -kernel space is chosen based on the mean-squared error loss with -ridge regularization. GPR uses the kernel to define the covariance of -a prior distribution over the target functions and uses the observed training -data to define a likelihood function. Based on Bayes theorem, a (Gaussian) -posterior distribution over target functions is defined, whose mean is used -for prediction. - -A major difference is that GPR can choose the kernel's hyperparameters based -on gradient-ascent on the marginal likelihood function while KRR needs to -perform a grid search on a cross-validated loss function (mean-squared error -loss). A further difference is that GPR learns a generative, probabilistic -model of the target function and can thus provide meaningful confidence -intervals and posterior samples along with the predictions while KRR only -provides predictions. - -The following figure illustrates both methods on an artificial dataset, which -consists of a sinusoidal target function and strong noise. The figure compares -the learned model of KRR and GPR based on a ExpSineSquared kernel, which is -suited for learning periodic functions. The kernel's hyperparameters control -the smoothness (length_scale) and periodicity of the kernel (periodicity). -Moreover, the noise level -of the data is learned explicitly by GPR by an additional WhiteKernel component -in the kernel and by the regularization parameter alpha of KRR. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_compare_gpr_krr_005.png - :target: ../auto_examples/gaussian_process/plot_compare_gpr_krr.html - :align: center - -The figure shows that both methods learn reasonable models of the target -function. GPR provides reasonable confidence bounds on the prediction which are not -available for KRR. A major difference between the two methods is the time -required for fitting and predicting: while fitting KRR is fast in principle, -the grid-search for hyperparameter optimization scales exponentially with the -number of hyperparameters ("curse of dimensionality"). The gradient-based -optimization of the parameters in GPR does not suffer from this exponential -scaling and is thus considerably faster on this example with 3-dimensional -hyperparameter space. The time for predicting is similar; however, generating -the variance of the predictive distribution of GPR takes considerably longer -than just predicting the mean. - -GPR on Mauna Loa CO2 data -------------------------- - -This example is based on Section 5.4.3 of [RW2006]_. -It illustrates an example of complex kernel engineering and -hyperparameter optimization using gradient ascent on the -log-marginal-likelihood. The data consists of the monthly average atmospheric -CO2 concentrations (in parts per million by volume (ppmv)) collected at the -Mauna Loa Observatory in Hawaii, between 1958 and 1997. The objective is to -model the CO2 concentration as a function of the time t. - -The kernel is composed of several terms that are responsible for explaining -different properties of the signal: - -- a long term, smooth rising trend is to be explained by an RBF kernel. The - RBF kernel with a large length-scale enforces this component to be smooth; - it is not enforced that the trend is rising which leaves this choice to the - GP. The specific length-scale and the amplitude are free hyperparameters. - -- a seasonal component, which is to be explained by the periodic - ExpSineSquared kernel with a fixed periodicity of 1 year. The length-scale - of this periodic component, controlling its smoothness, is a free parameter. - In order to allow decaying away from exact periodicity, the product with an - RBF kernel is taken. The length-scale of this RBF component controls the - decay time and is a further free parameter. - -- smaller, medium term irregularities are to be explained by a - RationalQuadratic kernel component, whose length-scale and alpha parameter, - which determines the diffuseness of the length-scales, are to be determined. - According to [RW2006]_, these irregularities can better be explained by - a RationalQuadratic than an RBF kernel component, probably because it can - accommodate several length-scales. - -- a "noise" term, consisting of an RBF kernel contribution, which shall - explain the correlated noise components such as local weather phenomena, - and a WhiteKernel contribution for the white noise. The relative amplitudes - and the RBF's length scale are further free parameters. - -Maximizing the log-marginal-likelihood after subtracting the target's mean -yields the following kernel with an LML of -83.214: - -:: - - 34.4**2 * RBF(length_scale=41.8) - + 3.27**2 * RBF(length_scale=180) * ExpSineSquared(length_scale=1.44, - periodicity=1) - + 0.446**2 * RationalQuadratic(alpha=17.7, length_scale=0.957) - + 0.197**2 * RBF(length_scale=0.138) + WhiteKernel(noise_level=0.0336) - -Thus, most of the target signal (34.4ppm) is explained by a long-term rising -trend (length-scale 41.8 years). The periodic component has an amplitude of -3.27ppm, a decay time of 180 years and a length-scale of 1.44. The long decay -time indicates that we have a locally very close to periodic seasonal -component. The correlated noise has an amplitude of 0.197ppm with a length -scale of 0.138 years and a white-noise contribution of 0.197ppm. Thus, the -overall noise level is very small, indicating that the data can be very well -explained by the model. The figure shows also that the model makes very -confident predictions until around 2015 - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_co2_003.png - :target: ../auto_examples/gaussian_process/plot_gpr_co2.html - :align: center +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy_targets.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_co2.py` .. _gpc: @@ -365,88 +239,88 @@ also invariant to rotations in the input space. For more details, we refer to Chapter 4 of [RW2006]_. For guidance on how to best combine different kernels, we refer to [Duv2014]_. -Gaussian Process Kernel API ---------------------------- -The main usage of a :class:`Kernel` is to compute the GP's covariance between -datapoints. For this, the method ``__call__`` of the kernel can be called. This -method can either be used to compute the "auto-covariance" of all pairs of -datapoints in a 2d array X, or the "cross-covariance" of all combinations -of datapoints of a 2d array X with datapoints in a 2d array Y. The following -identity holds true for all kernels k (except for the :class:`WhiteKernel`): -``k(X) == K(X, Y=X)`` - -If only the diagonal of the auto-covariance is being used, the method ``diag()`` -of a kernel can be called, which is more computationally efficient than the -equivalent call to ``__call__``: ``np.diag(k(X, X)) == k.diag(X)`` - -Kernels are parameterized by a vector :math:`\theta` of hyperparameters. These -hyperparameters can for instance control length-scales or periodicity of a -kernel (see below). All kernels support computing analytic gradients -of the kernel's auto-covariance with respect to :math:`log(\theta)` via setting -``eval_gradient=True`` in the ``__call__`` method. -That is, a ``(len(X), len(X), len(theta))`` array is returned where the entry -``[i, j, l]`` contains :math:`\frac{\partial k_\theta(x_i, x_j)}{\partial log(\theta_l)}`. -This gradient is used by the Gaussian process (both regressor and classifier) -in computing the gradient of the log-marginal-likelihood, which in turn is used -to determine the value of :math:`\theta`, which maximizes the log-marginal-likelihood, -via gradient ascent. For each hyperparameter, the initial value and the -bounds need to be specified when creating an instance of the kernel. The -current value of :math:`\theta` can be get and set via the property -``theta`` of the kernel object. Moreover, the bounds of the hyperparameters can be -accessed by the property ``bounds`` of the kernel. Note that both properties -(theta and bounds) return log-transformed values of the internally used values -since those are typically more amenable to gradient-based optimization. -The specification of each hyperparameter is stored in the form of an instance of -:class:`Hyperparameter` in the respective kernel. Note that a kernel using a -hyperparameter with name "x" must have the attributes self.x and self.x_bounds. - -The abstract base class for all kernels is :class:`Kernel`. Kernel implements a -similar interface as :class:`Estimator`, providing the methods ``get_params()``, -``set_params()``, and ``clone()``. This allows setting kernel values also via -meta-estimators such as :class:`Pipeline` or :class:`GridSearch`. Note that due to the nested -structure of kernels (by applying kernel operators, see below), the names of -kernel parameters might become relatively complicated. In general, for a -binary kernel operator, parameters of the left operand are prefixed with ``k1__`` -and parameters of the right operand with ``k2__``. An additional convenience -method is ``clone_with_theta(theta)``, which returns a cloned version of the -kernel but with the hyperparameters set to ``theta``. An illustrative example: - - >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF - >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0)) - >>> for hyperparameter in kernel.hyperparameters: print(hyperparameter) - Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - >>> params = kernel.get_params() - >>> for key in sorted(params): print("%s : %s" % (key, params[key])) - k1 : 1**2 * RBF(length_scale=0.5) - k1__k1 : 1**2 - k1__k1__constant_value : 1.0 - k1__k1__constant_value_bounds : (0.0, 10.0) - k1__k2 : RBF(length_scale=0.5) - k1__k2__length_scale : 0.5 - k1__k2__length_scale_bounds : (0.0, 10.0) - k2 : RBF(length_scale=2) - k2__length_scale : 2.0 - k2__length_scale_bounds : (0.0, 10.0) - >>> print(kernel.theta) # Note: log-transformed - [ 0. -0.69314718 0.69314718] - >>> print(kernel.bounds) # Note: log-transformed - [[ -inf 2.30258509] - [ -inf 2.30258509] - [ -inf 2.30258509]] - - -All Gaussian process kernels are interoperable with :mod:`sklearn.metrics.pairwise` -and vice versa: instances of subclasses of :class:`Kernel` can be passed as -``metric`` to ``pairwise_kernels`` from :mod:`sklearn.metrics.pairwise`. Moreover, -kernel functions from pairwise can be used as GP kernels by using the wrapper -class :class:`PairwiseKernel`. The only caveat is that the gradient of -the hyperparameters is not analytic but numeric and all those kernels support -only isotropic distances. The parameter ``gamma`` is considered to be a -hyperparameter and may be optimized. The other kernel parameters are set -directly at initialization and are kept fixed. - +.. dropdown:: Gaussian Process Kernel API + + The main usage of a :class:`Kernel` is to compute the GP's covariance between + datapoints. For this, the method ``__call__`` of the kernel can be called. This + method can either be used to compute the "auto-covariance" of all pairs of + datapoints in a 2d array X, or the "cross-covariance" of all combinations + of datapoints of a 2d array X with datapoints in a 2d array Y. The following + identity holds true for all kernels k (except for the :class:`WhiteKernel`): + ``k(X) == K(X, Y=X)`` + + If only the diagonal of the auto-covariance is being used, the method ``diag()`` + of a kernel can be called, which is more computationally efficient than the + equivalent call to ``__call__``: ``np.diag(k(X, X)) == k.diag(X)`` + + Kernels are parameterized by a vector :math:`\theta` of hyperparameters. These + hyperparameters can for instance control length-scales or periodicity of a + kernel (see below). All kernels support computing analytic gradients + of the kernel's auto-covariance with respect to :math:`log(\theta)` via setting + ``eval_gradient=True`` in the ``__call__`` method. + That is, a ``(len(X), len(X), len(theta))`` array is returned where the entry + ``[i, j, l]`` contains :math:`\frac{\partial k_\theta(x_i, x_j)}{\partial log(\theta_l)}`. + This gradient is used by the Gaussian process (both regressor and classifier) + in computing the gradient of the log-marginal-likelihood, which in turn is used + to determine the value of :math:`\theta`, which maximizes the log-marginal-likelihood, + via gradient ascent. For each hyperparameter, the initial value and the + bounds need to be specified when creating an instance of the kernel. The + current value of :math:`\theta` can be get and set via the property + ``theta`` of the kernel object. Moreover, the bounds of the hyperparameters can be + accessed by the property ``bounds`` of the kernel. Note that both properties + (theta and bounds) return log-transformed values of the internally used values + since those are typically more amenable to gradient-based optimization. + The specification of each hyperparameter is stored in the form of an instance of + :class:`Hyperparameter` in the respective kernel. Note that a kernel using a + hyperparameter with name "x" must have the attributes self.x and self.x_bounds. + + The abstract base class for all kernels is :class:`Kernel`. Kernel implements a + similar interface as :class:`~sklearn.base.BaseEstimator`, providing the + methods ``get_params()``, ``set_params()``, and ``clone()``. This allows + setting kernel values also via meta-estimators such as + :class:`~sklearn.pipeline.Pipeline` or + :class:`~sklearn.model_selection.GridSearchCV`. Note that due to the nested + structure of kernels (by applying kernel operators, see below), the names of + kernel parameters might become relatively complicated. In general, for a binary + kernel operator, parameters of the left operand are prefixed with ``k1__`` and + parameters of the right operand with ``k2__``. An additional convenience method + is ``clone_with_theta(theta)``, which returns a cloned version of the kernel + but with the hyperparameters set to ``theta``. An illustrative example: + + >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF + >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0)) + >>> for hyperparameter in kernel.hyperparameters: print(hyperparameter) + Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + >>> params = kernel.get_params() + >>> for key in sorted(params): print("%s : %s" % (key, params[key])) + k1 : 1**2 * RBF(length_scale=0.5) + k1__k1 : 1**2 + k1__k1__constant_value : 1.0 + k1__k1__constant_value_bounds : (0.0, 10.0) + k1__k2 : RBF(length_scale=0.5) + k1__k2__length_scale : 0.5 + k1__k2__length_scale_bounds : (0.0, 10.0) + k2 : RBF(length_scale=2) + k2__length_scale : 2.0 + k2__length_scale_bounds : (0.0, 10.0) + >>> print(kernel.theta) # Note: log-transformed + [ 0. -0.69314718 0.69314718] + >>> print(kernel.bounds) # Note: log-transformed + [[ -inf 2.30258509] + [ -inf 2.30258509] + [ -inf 2.30258509]] + + All Gaussian process kernels are interoperable with :mod:`sklearn.metrics.pairwise` + and vice versa: instances of subclasses of :class:`Kernel` can be passed as + ``metric`` to ``pairwise_kernels`` from :mod:`sklearn.metrics.pairwise`. Moreover, + kernel functions from pairwise can be used as GP kernels by using the wrapper + class :class:`PairwiseKernel`. The only caveat is that the gradient of + the hyperparameters is not analytic but numeric and all those kernels support + only isotropic distances. The parameter ``gamma`` is considered to be a + hyperparameter and may be optimized. The other kernel parameters are set + directly at initialization and are kept fixed. Basic kernels ------------- @@ -507,36 +381,41 @@ MatÊrn kernel ------------- The :class:`Matern` kernel is a stationary kernel and a generalization of the :class:`RBF` kernel. It has an additional parameter :math:`\nu` which controls -the smoothness of the resulting function. It is parameterized by a length-scale parameter :math:`l>0`, which can either be a scalar (isotropic variant of the kernel) or a vector with the same number of dimensions as the inputs :math:`x` (anisotropic variant of the kernel). The kernel is given by: +the smoothness of the resulting function. It is parameterized by a length-scale parameter :math:`l>0`, which can either be a scalar (isotropic variant of the kernel) or a vector with the same number of dimensions as the inputs :math:`x` (anisotropic variant of the kernel). -.. math:: +.. dropdown:: Mathematical implementation of MatÊrn kernel - k(x_i, x_j) = \frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg)^\nu K_\nu\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg), + The kernel is given by: -where :math:`d(\cdot,\cdot)` is the Euclidean distance, :math:`K_\nu(\cdot)` is a modified Bessel function and :math:`\Gamma(\cdot)` is the gamma function. -As :math:`\nu\rightarrow\infty`, the MatÊrn kernel converges to the RBF kernel. -When :math:`\nu = 1/2`, the MatÊrn kernel becomes identical to the absolute -exponential kernel, i.e., + .. math:: -.. math:: - k(x_i, x_j) = \exp \Bigg(- \frac{1}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{1}{2} + k(x_i, x_j) = \frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg)^\nu K_\nu\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg), -In particular, :math:`\nu = 3/2`: + where :math:`d(\cdot,\cdot)` is the Euclidean distance, :math:`K_\nu(\cdot)` is a modified Bessel function and :math:`\Gamma(\cdot)` is the gamma function. + As :math:`\nu\rightarrow\infty`, the MatÊrn kernel converges to the RBF kernel. + When :math:`\nu = 1/2`, the MatÊrn kernel becomes identical to the absolute + exponential kernel, i.e., -.. math:: - k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{3}}{l} d(x_i , x_j )\Bigg) \exp \Bigg(-\frac{\sqrt{3}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{3}{2} + .. math:: + k(x_i, x_j) = \exp \Bigg(- \frac{1}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{1}{2} -and :math:`\nu = 5/2`: + In particular, :math:`\nu = 3/2`: -.. math:: - k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{5}}{l} d(x_i , x_j ) +\frac{5}{3l} d(x_i , x_j )^2 \Bigg) \exp \Bigg(-\frac{\sqrt{5}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{5}{2} + .. math:: + k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{3}}{l} d(x_i , x_j )\Bigg) \exp \Bigg(-\frac{\sqrt{3}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{3}{2} + + and :math:`\nu = 5/2`: + + .. math:: + k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{5}}{l} d(x_i , x_j ) +\frac{5}{3l} d(x_i , x_j )^2 \Bigg) \exp \Bigg(-\frac{\sqrt{5}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{5}{2} + + are popular choices for learning functions that are not infinitely + differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = + 3/2`) or twice differentiable (:math:`\nu = 5/2`). -are popular choices for learning functions that are not infinitely -differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = -3/2`) or twice differentiable (:math:`\nu = 5/2`). + The flexibility of controlling the smoothness of the learned function via :math:`\nu` + allows adapting to the properties of the true underlying functional relation. -The flexibility of controlling the smoothness of the learned function via :math:`\nu` -allows adapting to the properties of the true underlying functional relation. The prior and posterior of a GP resulting from a MatÊrn kernel are shown in the following figure: diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 851f9a202fa2f..12ee76d8e4d39 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -72,35 +72,35 @@ evaluated and the best combination is retained. .. currentmodule:: sklearn.model_selection -.. topic:: Examples: +.. rubric:: Examples - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` for an example of - Grid Search computation on the digits dataset. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` for an example of + Grid Search computation on the digits dataset. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` for an example - of Grid Search coupling parameters from a text documents feature - extractor (n-gram count vectorizer and TF-IDF transformer) with a - classifier (here a linear SVM trained with SGD with either elastic - net or L2 penalty) using a :class:`pipeline.Pipeline` instance. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` for an example + of Grid Search coupling parameters from a text documents feature + extractor (n-gram count vectorizer and TF-IDF transformer) with a + classifier (here a linear SVM trained with SGD with either elastic + net or L2 penalty) using a :class:`~sklearn.pipeline.Pipeline` instance. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` - for an example of Grid Search within a cross validation loop on the iris - dataset. This is the best practice for evaluating the performance of a - model with grid search. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` + for an example of Grid Search within a cross validation loop on the iris + dataset. This is the best practice for evaluating the performance of a + model with grid search. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` - for an example of :class:`GridSearchCV` being used to evaluate multiple - metrics simultaneously. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` + for an example of :class:`GridSearchCV` being used to evaluate multiple + metrics simultaneously. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_refit_callable.py` - for an example of using ``refit=callable`` interface in - :class:`GridSearchCV`. The example shows how this interface adds certain - amount of flexibility in identifying the "best" estimator. This interface - can also be used in multiple metrics evaluation. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_refit_callable.py` + for an example of using ``refit=callable`` interface in + :class:`GridSearchCV`. The example shows how this interface adds certain + amount of flexibility in identifying the "best" estimator. This interface + can also be used in multiple metrics evaluation. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py` - for an example of how to do a statistical comparison on the outputs of - :class:`GridSearchCV`. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py` + for an example of how to do a statistical comparison on the outputs of + :class:`GridSearchCV`. .. _randomized_parameter_search: @@ -135,14 +135,14 @@ variate sample) method to sample a value. A call to the ``rvs`` function should provide independent random samples from possible parameter values on consecutive calls. - .. warning:: +.. warning:: - The distributions in ``scipy.stats`` prior to version scipy 0.16 - do not allow specifying a random state. Instead, they use the global - numpy random state, that can be seeded via ``np.random.seed`` or set - using ``np.random.set_state``. However, beginning scikit-learn 0.18, - the :mod:`sklearn.model_selection` module sets the random state provided - by the user if scipy >= 0.16 is also available. + The distributions in ``scipy.stats`` prior to version scipy 0.16 + do not allow specifying a random state. Instead, they use the global + numpy random state, that can be seeded via ``np.random.seed`` or set + using ``np.random.set_state``. However, beginning scikit-learn 0.18, + the :mod:`sklearn.model_selection` module sets the random state provided + by the user if scipy >= 0.16 is also available. For continuous parameters, such as ``C`` above, it is important to specify a continuous distribution to take full advantage of the randomization. This way, @@ -161,16 +161,16 @@ variable that is log-uniformly distributed between ``1e0`` and ``1e3``:: 'kernel': ['rbf'], 'class_weight':['balanced', None]} -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_randomized_search.py` compares the usage and efficiency - of randomized search and grid search. +* :ref:`sphx_glr_auto_examples_model_selection_plot_randomized_search.py` compares the usage and efficiency + of randomized search and grid search. -.. topic:: References: +.. rubric:: References - * Bergstra, J. and Bengio, Y., - Random search for hyper-parameter optimization, - The Journal of Machine Learning Research (2012) +* Bergstra, J. and Bengio, Y., + Random search for hyper-parameter optimization, + The Journal of Machine Learning Research (2012) .. _successive_halving_user_guide: @@ -222,10 +222,10 @@ need to explicitly import ``enable_halving_search_cv``:: >>> from sklearn.model_selection import HalvingGridSearchCV >>> from sklearn.model_selection import HalvingRandomSearchCV -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_heatmap.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_iterations.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_heatmap.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_iterations.py` Choosing ``min_resources`` and the number of candidates ------------------------------------------------------- @@ -528,15 +528,16 @@ In the example above, the best parameter combination is ``{'criterion': since it has reached the last iteration (3) with the highest score: 0.96. -.. topic:: References: +.. rubric:: References - .. [1] K. Jamieson, A. Talwalkar, - `Non-stochastic Best Arm Identification and Hyperparameter - Optimization `_, in - proc. of Machine Learning Research, 2016. - .. [2] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar, - :arxiv:`Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization - <1603.06560>`, in Machine Learning Research 18, 2018. +.. [1] K. Jamieson, A. Talwalkar, + `Non-stochastic Best Arm Identification and Hyperparameter + Optimization `_, in + proc. of Machine Learning Research, 2016. + +.. [2] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar, + :arxiv:`Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization + <1603.06560>`, in Machine Learning Research 18, 2018. .. _grid_search_tips: @@ -612,7 +613,7 @@ Here, ```` is the parameter name of the nested estimator, in this case ``estimator``. If the meta-estimator is constructed as a collection of estimators as in `pipeline.Pipeline`, then ```` refers to the name of the estimator, -see :ref:`pipeline_nested_parameters`. In practice, there can be several +see :ref:`pipeline_nested_parameters`. In practice, there can be several levels of nesting:: >>> from sklearn.pipeline import Pipeline diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst index 6314b2ea71737..1431f26132338 100644 --- a/doc/modules/impute.rst +++ b/doc/modules/impute.rst @@ -22,9 +22,9 @@ Univariate vs. Multivariate Imputation One type of imputation algorithm is univariate, which imputes values in the i-th feature dimension using only non-missing values in that feature dimension -(e.g. :class:`impute.SimpleImputer`). By contrast, multivariate imputation +(e.g. :class:`SimpleImputer`). By contrast, multivariate imputation algorithms use the entire set of available feature dimensions to estimate the -missing values (e.g. :class:`impute.IterativeImputer`). +missing values (e.g. :class:`IterativeImputer`). .. _single_imputer: @@ -87,6 +87,8 @@ string values or pandas categoricals when using the ``'most_frequent'`` or ['a' 'y'] ['b' 'y']] +For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`. + .. _iterative_imputer: @@ -176,9 +178,9 @@ cannot be achieved by a single call to ``transform``. References ---------- -.. [1] Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate +.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate Imputation by Chained Equations in R". Journal of Statistical Software 45: - 1-67. + 1-67. `_ .. [2] Roderick J A Little and Donald B Rubin (1986). "Statistical Analysis with Missing Data". John Wiley & Sons, Inc., New York, NY, USA. @@ -190,19 +192,20 @@ Nearest neighbors imputation The :class:`KNNImputer` class provides imputation for filling in missing values using the k-Nearest Neighbors approach. By default, a euclidean distance metric -that supports missing values, :func:`~sklearn.metrics.nan_euclidean_distances`, -is used to find the nearest neighbors. Each missing feature is imputed using -values from ``n_neighbors`` nearest neighbors that have a value for the -feature. The feature of the neighbors are averaged uniformly or weighted by -distance to each neighbor. If a sample has more than one feature missing, then -the neighbors for that sample can be different depending on the particular -feature being imputed. When the number of available neighbors is less than -`n_neighbors` and there are no defined distances to the training set, the -training set average for that feature is used during imputation. If there is at -least one neighbor with a defined distance, the weighted or unweighted average -of the remaining neighbors will be used during imputation. If a feature is -always missing in training, it is removed during `transform`. For more -information on the methodology, see ref. [OL2001]_. +that supports missing values, +:func:`~sklearn.metrics.pairwise.nan_euclidean_distances`, is used to find the +nearest neighbors. Each missing feature is imputed using values from +``n_neighbors`` nearest neighbors that have a value for the feature. The +feature of the neighbors are averaged uniformly or weighted by distance to each +neighbor. If a sample has more than one feature missing, then the neighbors for +that sample can be different depending on the particular feature being imputed. +When the number of available neighbors is less than `n_neighbors` and there are +no defined distances to the training set, the training set average for that +feature is used during imputation. If there is at least one neighbor with a +defined distance, the weighted or unweighted average of the remaining neighbors +will be used during imputation. If a feature is always missing in training, it +is removed during `transform`. For more information on the methodology, see +ref. [OL2001]_. The following snippet demonstrates how to replace missing values, encoded as ``np.nan``, using the mean feature value of the two nearest @@ -219,12 +222,15 @@ neighbors of samples with missing values:: [5.5, 6. , 5. ], [8. , 8. , 7. ]]) -.. topic:: References +For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`. + +.. rubric:: References - .. [OL2001] Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, - Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, - Missing value estimation methods for DNA microarrays, BIOINFORMATICS - Vol. 17 no. 6, 2001 Pages 520-525. +.. [OL2001] `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, + Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, + Missing value estimation methods for DNA microarrays, BIOINFORMATICS + Vol. 17 no. 6, 2001 Pages 520-525. + `_ Keeping the number of features constant ======================================= @@ -303,10 +309,12 @@ whether or not they contain missing values:: >>> indicator.features_ array([0, 1, 2, 3]) -When using the :class:`MissingIndicator` in a :class:`Pipeline`, be sure to use -the :class:`FeatureUnion` or :class:`ColumnTransformer` to add the indicator -features to the regular features. First we obtain the `iris` dataset, and add -some missing values to it. +When using the :class:`MissingIndicator` in a +:class:`~sklearn.pipeline.Pipeline`, be sure to use the +:class:`~sklearn.pipeline.FeatureUnion` or +:class:`~sklearn.compose.ColumnTransformer` to add the indicator features to +the regular features. First we obtain the `iris` dataset, and add some missing +values to it. >>> from sklearn.datasets import load_iris >>> from sklearn.impute import SimpleImputer, MissingIndicator @@ -319,9 +327,9 @@ some missing values to it. >>> X_train, X_test, y_train, _ = train_test_split(X, y, test_size=100, ... random_state=0) -Now we create a :class:`FeatureUnion`. All features will be imputed using -:class:`SimpleImputer`, in order to enable classifiers to work with this data. -Additionally, it adds the indicator variables from +Now we create a :class:`~sklearn.pipeline.FeatureUnion`. All features will be +imputed using :class:`SimpleImputer`, in order to enable classifiers to work +with this data. Additionally, it adds the indicator variables from :class:`MissingIndicator`. >>> transformer = FeatureUnion( @@ -334,8 +342,8 @@ Additionally, it adds the indicator variables from (100, 8) Of course, we cannot use the transformer to make any predictions. We should -wrap this in a :class:`Pipeline` with a classifier (e.g., a -:class:`DecisionTreeClassifier`) to be able to make predictions. +wrap this in a :class:`~sklearn.pipeline.Pipeline` with a classifier (e.g., a +:class:`~sklearn.tree.DecisionTreeClassifier`) to be able to make predictions. >>> clf = make_pipeline(transformer, DecisionTreeClassifier()) >>> clf = clf.fit(X_train, y_train) diff --git a/doc/modules/isotonic.rst b/doc/modules/isotonic.rst index 8967ef18afcb3..50fbdb24e72c7 100644 --- a/doc/modules/isotonic.rst +++ b/doc/modules/isotonic.rst @@ -9,10 +9,10 @@ Isotonic regression The class :class:`IsotonicRegression` fits a non-decreasing real function to 1-dimensional data. It solves the following problem: - minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2` - - subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`, +.. math:: + \min \sum_i w_i (y_i - \hat{y}_i)^2 +subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`, where the weights :math:`w_i` are strictly positive, and both `X` and `y` are arbitrary real quantities. @@ -31,3 +31,7 @@ thus form a function that is piecewise linear: .. figure:: ../auto_examples/miscellaneous/images/sphx_glr_plot_isotonic_regression_001.png :target: ../auto_examples/miscellaneous/plot_isotonic_regression.html :align: center + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_isotonic_regression.py` diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst index 40e8e8b526d1e..305c3cc6601fb 100644 --- a/doc/modules/kernel_approximation.rst +++ b/doc/modules/kernel_approximation.rst @@ -35,13 +35,65 @@ is advisable to compare results against exact kernel methods when possible. Nystroem Method for Kernel Approximation ---------------------------------------- -The Nystroem method, as implemented in :class:`Nystroem` is a general method -for low-rank approximations of kernels. It achieves this by essentially subsampling -the data on which the kernel is evaluated. -By default :class:`Nystroem` uses the ``rbf`` kernel, but it can use any -kernel function or a precomputed kernel matrix. -The number of samples used - which is also the dimensionality of the features computed - -is given by the parameter ``n_components``. +The Nystroem method, as implemented in :class:`Nystroem` is a general method for +reduced rank approximations of kernels. It achieves this by subsampling without +replacement rows/columns of the data on which the kernel is evaluated. While the +computational complexity of the exact method is +:math:`\mathcal{O}(n^3_{\text{samples}})`, the complexity of the approximation +is :math:`\mathcal{O}(n^2_{\text{components}} \cdot n_{\text{samples}})`, where +one can set :math:`n_{\text{components}} \ll n_{\text{samples}}` without a +significative decrease in performance [WS2001]_. + +We can construct the eigendecomposition of the kernel matrix :math:`K`, based +on the features of the data, and then split it into sampled and unsampled data +points. + +.. math:: + + K = U \Lambda U^T + = \begin{bmatrix} U_1 \\ U_2\end{bmatrix} \Lambda \begin{bmatrix} U_1 \\ U_2 \end{bmatrix}^T + = \begin{bmatrix} U_1 \Lambda U_1^T & U_1 \Lambda U_2^T \\ U_2 \Lambda U_1^T & U_2 \Lambda U_2^T \end{bmatrix} + \equiv \begin{bmatrix} K_{11} & K_{12} \\ K_{21} & K_{22} \end{bmatrix} + +where: + +* :math:`U` is orthonormal +* :math:`\Lambda` is diagonal matrix of eigenvalues +* :math:`U_1` is orthonormal matrix of samples that were chosen +* :math:`U_2` is orthonormal matrix of samples that were not chosen + +Given that :math:`U_1 \Lambda U_1^T` can be obtained by orthonormalization of +the matrix :math:`K_{11}`, and :math:`U_2 \Lambda U_1^T` can be evaluated (as +well as its transpose), the only remaining term to elucidate is +:math:`U_2 \Lambda U_2^T`. To do this we can express it in terms of the already +evaluated matrices: + +.. math:: + + \begin{align} U_2 \Lambda U_2^T &= \left(K_{21} U_1 \Lambda^{-1}\right) \Lambda \left(K_{21} U_1 \Lambda^{-1}\right)^T + \\&= K_{21} U_1 (\Lambda^{-1} \Lambda) \Lambda^{-1} U_1^T K_{21}^T + \\&= K_{21} U_1 \Lambda^{-1} U_1^T K_{21}^T + \\&= K_{21} K_{11}^{-1} K_{21}^T + \\&= \left( K_{21} K_{11}^{-\frac12} \right) \left( K_{21} K_{11}^{-\frac12} \right)^T + .\end{align} + +During ``fit``, the class :class:`Nystroem` evaluates the basis :math:`U_1`, and +computes the normalization constant, :math:`K_{11}^{-\frac12}`. Later, during +``transform``, the kernel matrix is determined between the basis (given by the +`components_` attribute) and the new data points, ``X``. This matrix is then +multiplied by the ``normalization_`` matrix for the final result. + +By default :class:`Nystroem` uses the ``rbf`` kernel, but it can use any kernel +function or a precomputed kernel matrix. The number of samples used - which is +also the dimensionality of the features computed - is given by the parameter +``n_components``. + +.. rubric:: Examples + +* See the example entitled + :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`, + that shows an efficient machine learning pipeline that uses a + :class:`Nystroem` kernel. .. _rbf_kernel_approx: @@ -91,9 +143,9 @@ use of larger feature spaces more efficient. Comparing an exact RBF kernel (left) with the approximation (right) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` .. _additive_chi_kernel_approx: @@ -108,7 +160,7 @@ The additive chi squared kernel as used here is given by k(x, y) = \sum_i \frac{2x_iy_i}{x_i+y_i} -This is not exactly the same as :func:`sklearn.metrics.additive_chi2_kernel`. +This is not exactly the same as :func:`sklearn.metrics.pairwise.additive_chi2_kernel`. The authors of [VZ2010]_ prefer the version above as it is always positive definite. Since the kernel is additive, it is possible to treat all components @@ -163,8 +215,8 @@ function given by: where: - * ``x``, ``y`` are the input vectors - * ``d`` is the kernel degree +* ``x``, ``y`` are the input vectors +* ``d`` is the kernel degree Intuitively, the feature space of the polynomial kernel of degree `d` consists of all possible degree-`d` products among input features, which enables @@ -189,9 +241,9 @@ In addition, this method can transform samples in time, where :math:`n_{\text{components}}` is the desired output dimension, determined by ``n_components``. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py` +* :ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py` .. _tensor_sketch_kernel_approx: @@ -231,26 +283,29 @@ The classes in this submodule allow to approximate the embedding or store training examples. -.. topic:: References: - - .. [RR2007] `"Random features for large-scale kernel machines" - `_ - Rahimi, A. and Recht, B. - Advances in neural information processing 2007, - .. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels" - `_ - Li, F., Ionescu, C., and Sminchisescu, C. - - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. - .. [VZ2010] `"Efficient additive kernels via explicit feature maps" - `_ - Vedaldi, A. and Zisserman, A. - Computer Vision and Pattern Recognition 2010 - .. [VVZ2010] `"Generalized RBF feature maps for Efficient Detection" - `_ - Vempati, S. and Vedaldi, A. and Zisserman, A. and Jawahar, CV - 2010 - .. [PP2013] :doi:`"Fast and scalable polynomial kernels via explicit feature maps" - <10.1145/2487575.2487591>` - Pham, N., & Pagh, R. - 2013 - .. [CCF2002] `"Finding frequent items in data streams" - `_ - Charikar, M., Chen, K., & Farach-Colton - 2002 - .. [WIKICS] `"Wikipedia: Count sketch" - `_ +.. rubric:: References + +.. [WS2001] `"Using the NystrÃļm method to speed up kernel machines" + `_ + Williams, C.K.I.; Seeger, M. - 2001. +.. [RR2007] `"Random features for large-scale kernel machines" + `_ + Rahimi, A. and Recht, B. - Advances in neural information processing 2007, +.. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels" + `_ + Li, F., Ionescu, C., and Sminchisescu, C. + - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. +.. [VZ2010] `"Efficient additive kernels via explicit feature maps" + `_ + Vedaldi, A. and Zisserman, A. - Computer Vision and Pattern Recognition 2010 +.. [VVZ2010] `"Generalized RBF feature maps for Efficient Detection" + `_ + Vempati, S. and Vedaldi, A. and Zisserman, A. and Jawahar, CV - 2010 +.. [PP2013] :doi:`"Fast and scalable polynomial kernels via explicit feature maps" + <10.1145/2487575.2487591>` + Pham, N., & Pagh, R. - 2013 +.. [CCF2002] `"Finding frequent items in data streams" + `_ + Charikar, M., Chen, K., & Farach-Colton - 2002 +.. [WIKICS] `"Wikipedia: Count sketch" + `_ diff --git a/doc/modules/kernel_ridge.rst b/doc/modules/kernel_ridge.rst index 286e9d4ac5322..fcc19a49628c4 100644 --- a/doc/modules/kernel_ridge.rst +++ b/doc/modules/kernel_ridge.rst @@ -55,8 +55,11 @@ dense model. :target: ../auto_examples/miscellaneous/plot_kernel_ridge_regression.html :align: center +.. rubric:: Examples -.. topic:: References: +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_ridge_regression.py` - .. [M2012] "Machine Learning: A Probabilistic Perspective" - Murphy, K. P. - chapter 14.4.3, pp. 492-493, The MIT Press, 2012 +.. rubric:: References + +.. [M2012] "Machine Learning: A Probabilistic Perspective" + Murphy, K. P. - chapter 14.4.3, pp. 492-493, The MIT Press, 2012 diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst index f8c6064ebedf0..0d264ec662a9f 100644 --- a/doc/modules/lda_qda.rst +++ b/doc/modules/lda_qda.rst @@ -29,10 +29,10 @@ Discriminant Analysis can only learn linear boundaries, while Quadratic Discriminant Analysis can learn quadratic boundaries and is therefore more flexible. -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`: Comparison of LDA and QDA - on synthetic data. +* :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`: Comparison of LDA and + QDA on synthetic data. Dimensionality reduction using Linear Discriminant Analysis =========================================================== @@ -49,10 +49,10 @@ This is implemented in the `transform` method. The desired dimensionality can be set using the ``n_components`` parameter. This parameter has no influence on the `fit` and `predict` methods. -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`: Comparison of LDA and PCA - for dimensionality reduction of the Iris dataset +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`: Comparison of LDA and + PCA for dimensionality reduction of the Iris dataset .. _lda_qda_math: @@ -190,11 +190,11 @@ matrix. The shrunk Ledoit and Wolf estimator of covariance may not always be the best choice. For example if the distribution of the data is normally distributed, the -Oracle Shrinkage Approximating estimator :class:`sklearn.covariance.OAS` +Oracle Approximating Shrinkage estimator :class:`sklearn.covariance.OAS` yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's formula used with shrinkage="auto". In LDA, the data are assumed to be gaussian conditionally to the class. If these assumptions hold, using LDA with -the OAS estimator of covariance will yield a better classification +the OAS estimator of covariance will yield a better classification accuracy than if Ledoit and Wolf or the empirical covariance estimator is used. The covariance estimator can be chosen using with the ``covariance_estimator`` @@ -210,10 +210,10 @@ class. A covariance estimator should have a :term:`fit` method and a .. centered:: |shrinkage| -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers - with Empirical, Ledoit Wolf and OAS covariance estimator. +* :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers + with Empirical, Ledoit Wolf and OAS covariance estimator. Estimation algorithms ===================== @@ -253,13 +253,13 @@ transform, and it supports shrinkage. However, the 'eigen' solver needs to compute the covariance matrix, so it might not be suitable for situations with a high number of features. -.. topic:: References: +.. rubric:: References - .. [1] "The Elements of Statistical Learning", Hastie T., Tibshirani R., - Friedman J., Section 4.3, p.106-119, 2008. +.. [1] "The Elements of Statistical Learning", Hastie T., Tibshirani R., + Friedman J., Section 4.3, p.106-119, 2008. - .. [2] Ledoit O, Wolf M. Honey, I Shrunk the Sample Covariance Matrix. - The Journal of Portfolio Management 30(4), 110-119, 2004. +.. [2] Ledoit O, Wolf M. Honey, I Shrunk the Sample Covariance Matrix. + The Journal of Portfolio Management 30(4), 110-119, 2004. - .. [3] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification - (Second Edition), section 2.6.2. +.. [3] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification + (Second Edition), section 2.6.2. diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst index 3d458a1a67416..f5af5a748500a 100644 --- a/doc/modules/learning_curve.rst +++ b/doc/modules/learning_curve.rst @@ -39,11 +39,11 @@ easy to see whether the estimator suffers from bias or variance. However, in high-dimensional spaces, models can become very difficult to visualize. For this reason, it is often helpful to use the tools described below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_underfitting_overfitting.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_underfitting_overfitting.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py` .. _validation_curve: diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index 154bbe5ee5cd7..d06101adabdb5 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -37,7 +37,7 @@ solves a problem of the form: :align: center :scale: 50% -:class:`LinearRegression` will take in its ``fit`` method arrays X, y +:class:`LinearRegression` will take in its ``fit`` method arrays ``X``, ``y`` and will store the coefficients :math:`w` of the linear model in its ``coef_`` member:: @@ -57,9 +57,9 @@ to random errors in the observed target, producing a large variance. This situation of *multicollinearity* can arise, for example, when data are collected without an experimental design. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_ols.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ols.py` Non-Negative Least Squares -------------------------- @@ -71,9 +71,9 @@ quantities (e.g., frequency counts or prices of goods). parameter: when set to `True` `Non-Negative Least Squares `_ are then applied. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py` Ordinary Least Squares Complexity --------------------------------- @@ -114,7 +114,7 @@ of shrinkage and thus the coefficients become more robust to collinearity. As with other linear models, :class:`Ridge` will take in its ``fit`` method -arrays X, y and will store the coefficients :math:`w` of the linear model in +arrays ``X``, ``y`` and will store the coefficients :math:`w` of the linear model in its ``coef_`` member:: >>> from sklearn import linear_model @@ -172,11 +172,11 @@ Machines `_ with a linear kernel. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` - * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` Ridge Complexity ---------------- @@ -193,9 +193,14 @@ This method has the same order of complexity as Setting the regularization parameter: leave-one-out Cross-Validation -------------------------------------------------------------------- -:class:`RidgeCV` implements ridge regression with built-in -cross-validation of the alpha parameter. The object works in the same way -as GridSearchCV except that it defaults to Leave-One-Out Cross-Validation:: +:class:`RidgeCV` and :class:`RidgeClassifierCV` implement ridge +regression/classification with built-in cross-validation of the alpha parameter. +They work in the same way as :class:`~sklearn.model_selection.GridSearchCV` except +that it defaults to efficient Leave-One-Out :term:`cross-validation`. +When using the default :term:`cross-validation`, alpha cannot be 0 due to the +formulation used to calculate Leave-One-Out error. See [RL2007]_ for details. + +Usage example:: >>> import numpy as np >>> from sklearn import linear_model @@ -211,13 +216,11 @@ cross-validation with :class:`~sklearn.model_selection.GridSearchCV`, for example `cv=10` for 10-fold cross-validation, rather than Leave-One-Out Cross-Validation. -.. topic:: References - - * "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report - `_, - `course slides - `_). +.. dropdown:: References + .. [RL2007] "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report + `_, + `course slides `_). .. _lasso: @@ -257,11 +260,11 @@ for another implementation:: The function :func:`lasso_path` is useful for lower-level tasks, as it computes the coefficients along the full path of possible values. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - * :ref:`sphx_glr_auto_examples_applications_plot_tomography_l1_reconstruction.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` +* :ref:`sphx_glr_auto_examples_applications_plot_tomography_l1_reconstruction.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` .. note:: **Feature selection with Lasso** @@ -270,20 +273,19 @@ computes the coefficients along the full path of possible values. thus be used to perform feature selection, as detailed in :ref:`l1_feature_selection`. -The following two references explain the iterations -used in the coordinate descent solver of scikit-learn, as well as -the duality gap computation used for convergence control. +.. dropdown:: References -.. topic:: References - - * "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). - * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) + The following two references explain the iterations + used in the coordinate descent solver of scikit-learn, as well as + the duality gap computation used for convergence control. + * "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). + * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) Setting regularization parameter -------------------------------- @@ -340,76 +342,81 @@ the problem is badly conditioned (e.g. more features than samples). :align: center :scale: 50% +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` + .. _aic_bic: -**Mathematical details** +AIC and BIC criteria +^^^^^^^^^^^^^^^^^^^^ The definition of AIC (and thus BIC) might differ in the literature. In this section, we give more information regarding the criterion computed in -scikit-learn. The AIC criterion is defined as: +scikit-learn. -.. math:: - AIC = -2 \log(\hat{L}) + 2 d +.. dropdown:: Mathematical details -where :math:`\hat{L}` is the maximum likelihood of the model and -:math:`d` is the number of parameters (as well referred to as degrees of -freedom in the previous section). + The AIC criterion is defined as: -The definition of BIC replace the constant :math:`2` by :math:`\log(N)`: + .. math:: + AIC = -2 \log(\hat{L}) + 2 d -.. math:: - BIC = -2 \log(\hat{L}) + \log(N) d + where :math:`\hat{L}` is the maximum likelihood of the model and + :math:`d` is the number of parameters (as well referred to as degrees of + freedom in the previous section). -where :math:`N` is the number of samples. + The definition of BIC replace the constant :math:`2` by :math:`\log(N)`: -For a linear Gaussian model, the maximum log-likelihood is defined as: + .. math:: + BIC = -2 \log(\hat{L}) + \log(N) d -.. math:: - \log(\hat{L}) = - \frac{n}{2} \log(2 \pi) - \frac{n}{2} \ln(\sigma^2) - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{2\sigma^2} + where :math:`N` is the number of samples. -where :math:`\sigma^2` is an estimate of the noise variance, -:math:`y_i` and :math:`\hat{y}_i` are respectively the true and predicted -targets, and :math:`n` is the number of samples. + For a linear Gaussian model, the maximum log-likelihood is defined as: -Plugging the maximum log-likelihood in the AIC formula yields: + .. math:: + \log(\hat{L}) = - \frac{n}{2} \log(2 \pi) - \frac{n}{2} \ln(\sigma^2) - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{2\sigma^2} -.. math:: - AIC = n \log(2 \pi \sigma^2) + \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sigma^2} + 2 d + where :math:`\sigma^2` is an estimate of the noise variance, + :math:`y_i` and :math:`\hat{y}_i` are respectively the true and predicted + targets, and :math:`n` is the number of samples. -The first term of the above expression is sometimes discarded since it is a -constant when :math:`\sigma^2` is provided. In addition, -it is sometimes stated that the AIC is equivalent to the :math:`C_p` statistic -[12]_. In a strict sense, however, it is equivalent only up to some constant -and a multiplicative factor. + Plugging the maximum log-likelihood in the AIC formula yields: -At last, we mentioned above that :math:`\sigma^2` is an estimate of the -noise variance. In :class:`LassoLarsIC` when the parameter `noise_variance` is -not provided (default), the noise variance is estimated via the unbiased -estimator [13]_ defined as: + .. math:: + AIC = n \log(2 \pi \sigma^2) + \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sigma^2} + 2 d -.. math:: - \sigma^2 = \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{n - p} + The first term of the above expression is sometimes discarded since it is a + constant when :math:`\sigma^2` is provided. In addition, + it is sometimes stated that the AIC is equivalent to the :math:`C_p` statistic + [12]_. In a strict sense, however, it is equivalent only up to some constant + and a multiplicative factor. -where :math:`p` is the number of features and :math:`\hat{y}_i` is the -predicted target using an ordinary least squares regression. Note, that this -formula is valid only when `n_samples > n_features`. + At last, we mentioned above that :math:`\sigma^2` is an estimate of the + noise variance. In :class:`LassoLarsIC` when the parameter `noise_variance` is + not provided (default), the noise variance is estimated via the unbiased + estimator [13]_ defined as: -.. topic:: Examples: + .. math:: + \sigma^2 = \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{n - p} - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` + where :math:`p` is the number of features and :math:`\hat{y}_i` is the + predicted target using an ordinary least squares regression. Note, that this + formula is valid only when `n_samples > n_features`. -.. topic:: References + .. rubric:: References .. [12] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani. - "On the degrees of freedom of the lasso." - The Annals of Statistics 35.5 (2007): 2173-2192. - <0712.0881.pdf>` + "On the degrees of freedom of the lasso." + The Annals of Statistics 35.5 (2007): 2173-2192. + <0712.0881.pdf>` .. [13] :doi:`Cherkassky, Vladimir, and Yunqian Ma. - "Comparison of model selection for regression." - Neural computation 15.7 (2003): 1691-1714. - <10.1162/089976603321891864>` + "Comparison of model selection for regression." + Neural computation 15.7 (2003): 1691-1714. + <10.1162/089976603321891864>` Comparison with the regularization parameter of SVM ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -446,28 +453,29 @@ the MultiTaskLasso are full columns. .. centered:: Fitting a time-series model, imposing that any active feature be active at all times. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_multi_task_lasso_support.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_multi_task_lasso_support.py` -Mathematically, it consists of a linear model trained with a mixed -:math:`\ell_1` :math:`\ell_2`-norm for regularization. -The objective function to minimize is: +.. dropdown:: Mathematical details -.. math:: \min_{W} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}} + Mathematically, it consists of a linear model trained with a mixed + :math:`\ell_1` :math:`\ell_2`-norm for regularization. + The objective function to minimize is: -where :math:`\text{Fro}` indicates the Frobenius norm + .. math:: \min_{W} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}} -.. math:: ||A||_{\text{Fro}} = \sqrt{\sum_{ij} a_{ij}^2} + where :math:`\text{Fro}` indicates the Frobenius norm -and :math:`\ell_1` :math:`\ell_2` reads + .. math:: ||A||_{\text{Fro}} = \sqrt{\sum_{ij} a_{ij}^2} -.. math:: ||A||_{2 1} = \sum_i \sqrt{\sum_j a_{ij}^2}. + and :math:`\ell_1` :math:`\ell_2` reads -The implementation in the class :class:`MultiTaskLasso` uses -coordinate descent as the algorithm to fit the coefficients. + .. math:: ||A||_{2 1} = \sum_i \sqrt{\sum_j a_{ij}^2}. + The implementation in the class :class:`MultiTaskLasso` uses + coordinate descent as the algorithm to fit the coefficients. .. _elastic_net: @@ -504,24 +512,25 @@ The objective function to minimize is in this case The class :class:`ElasticNetCV` can be used to set the parameters ``alpha`` (:math:`\alpha`) and ``l1_ratio`` (:math:`\rho`) by cross-validation. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py` -The following two references explain the iterations -used in the coordinate descent solver of scikit-learn, as well as -the duality gap computation used for convergence control. +.. dropdown:: References -.. topic:: References + The following two references explain the iterations + used in the coordinate descent solver of scikit-learn, as well as + the duality gap computation used for convergence control. - * "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). - * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) + * "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). + * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) .. _multi_task_elastic_net: @@ -563,30 +572,30 @@ between the features. The advantages of LARS are: - - It is numerically efficient in contexts where the number of features - is significantly greater than the number of samples. +- It is numerically efficient in contexts where the number of features + is significantly greater than the number of samples. - - It is computationally just as fast as forward selection and has - the same order of complexity as ordinary least squares. +- It is computationally just as fast as forward selection and has + the same order of complexity as ordinary least squares. - - It produces a full piecewise linear solution path, which is - useful in cross-validation or similar attempts to tune the model. +- It produces a full piecewise linear solution path, which is + useful in cross-validation or similar attempts to tune the model. - - If two features are almost equally correlated with the target, - then their coefficients should increase at approximately the same - rate. The algorithm thus behaves as intuition would expect, and - also is more stable. +- If two features are almost equally correlated with the target, + then their coefficients should increase at approximately the same + rate. The algorithm thus behaves as intuition would expect, and + also is more stable. - - It is easily modified to produce solutions for other estimators, - like the Lasso. +- It is easily modified to produce solutions for other estimators, + like the Lasso. The disadvantages of the LARS method include: - - Because LARS is based upon an iterative refitting of the - residuals, it would appear to be especially sensitive to the - effects of noise. This problem is discussed in detail by Weisberg - in the discussion section of the Efron et al. (2004) Annals of - Statistics article. +- Because LARS is based upon an iterative refitting of the + residuals, it would appear to be especially sensitive to the + effects of noise. This problem is discussed in detail by Weisberg + in the discussion section of the Efron et al. (2004) Annals of + Statistics article. The LARS model can be used via the estimator :class:`Lars`, or its low-level implementation :func:`lars_path` or :func:`lars_path_gram`. @@ -614,35 +623,33 @@ function of the norm of its coefficients. >>> reg.coef_ array([0.6..., 0. ]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars.py` The Lars algorithm provides the full path of the coefficients along the regularization parameter almost for free, thus a common operation is to retrieve the path with one of the functions :func:`lars_path` or :func:`lars_path_gram`. -Mathematical formulation ------------------------- - -The algorithm is similar to forward stepwise regression, but instead -of including features at each step, the estimated coefficients are -increased in a direction equiangular to each one's correlations with -the residual. +.. dropdown:: Mathematical formulation -Instead of giving a vector result, the LARS solution consists of a -curve denoting the solution for each value of the :math:`\ell_1` norm of the -parameter vector. The full coefficients path is stored in the array -``coef_path_`` of shape `(n_features, max_features + 1)`. The first -column is always zero. + The algorithm is similar to forward stepwise regression, but instead + of including features at each step, the estimated coefficients are + increased in a direction equiangular to each one's correlations with + the residual. -.. topic:: References: + Instead of giving a vector result, the LARS solution consists of a + curve denoting the solution for each value of the :math:`\ell_1` norm of the + parameter vector. The full coefficients path is stored in the array + ``coef_path_`` of shape `(n_features, max_features + 1)`. The first + column is always zero. - * Original Algorithm is detailed in the paper `Least Angle Regression - `_ - by Hastie et al. + .. rubric:: References + * Original Algorithm is detailed in the paper `Least Angle Regression + `_ + by Hastie et al. .. _omp: @@ -657,7 +664,7 @@ orthogonal matching pursuit can approximate the optimum solution vector with a fixed number of non-zero elements: .. math:: - \underset{w}{\operatorname{arg\,min\,}} ||y - Xw||_2^2 \text{ subject to } ||w||_0 \leq n_{\text{nonzero\_coefs}} + \underset{w}{\operatorname{arg\,min\,}} ||y - Xw||_2^2 \text{ subject to } ||w||_0 \leq n_{\text{nonzero_coefs}} Alternatively, orthogonal matching pursuit can target a specific error instead of a specific number of non-zero coefficients. This can be expressed as: @@ -673,18 +680,17 @@ residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_omp.py` +.. rubric:: Examples -.. topic:: References: +* :ref:`sphx_glr_auto_examples_linear_model_plot_omp.py` - * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf +.. dropdown:: References - * `Matching pursuits with time-frequency dictionaries - `_, - S. G. Mallat, Z. Zhang, + * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf + * `Matching pursuits with time-frequency dictionaries + `_, + S. G. Mallat, Z. Zhang, .. _bayesian_regression: @@ -707,29 +713,29 @@ variable to be estimated from the data. To obtain a fully probabilistic model, the output :math:`y` is assumed to be Gaussian distributed around :math:`X w`: -.. math:: p(y|X,w,\alpha) = \mathcal{N}(y|X w,\alpha) +.. math:: p(y|X,w,\alpha) = \mathcal{N}(y|X w,\alpha^{-1}) where :math:`\alpha` is again treated as a random variable that is to be estimated from the data. The advantages of Bayesian Regression are: - - It adapts to the data at hand. +- It adapts to the data at hand. - - It can be used to include regularization parameters in the - estimation procedure. +- It can be used to include regularization parameters in the + estimation procedure. The disadvantages of Bayesian regression include: - - Inference of the model can be time consuming. +- Inference of the model can be time consuming. -.. topic:: References +.. dropdown:: References - * A good introduction to Bayesian methods is given in C. Bishop: Pattern - Recognition and Machine learning + * A good introduction to Bayesian methods is given in C. Bishop: Pattern + Recognition and Machine learning - * Original Algorithm is detailed in the book `Bayesian learning for neural - networks` by Radford M. Neal + * Original Algorithm is detailed in the book `Bayesian learning for neural + networks` by Radford M. Neal .. _bayesian_ridge_regression: @@ -786,17 +792,17 @@ Due to the Bayesian framework, the weights found are slightly different to the ones found by :ref:`ordinary_least_squares`. However, Bayesian Ridge Regression is more robust to ill-posed problems. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py` -.. topic:: References: +.. dropdown:: References - * Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 + * Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 - * David J. C. MacKay, `Bayesian Interpolation `_, 1992. + * David J. C. MacKay, `Bayesian Interpolation `_, 1992. - * Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. + * Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. .. _automatic_relevance_determination: @@ -828,20 +834,20 @@ ARD is also known in the literature as *Sparse Bayesian Learning* and *Relevance Vector Machine* [3]_ [4]_. For a worked-out comparison between ARD and `Bayesian Ridge Regression`_, see the example below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_ard.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ard.py` -.. topic:: References: - .. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 +.. rubric:: References - .. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ +.. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 - .. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ +.. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ - .. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ +.. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ +.. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ .. _Logistic_regression: @@ -878,6 +884,14 @@ regularization. implemented in scikit-learn, so it expects a categorical target, making the Logistic Regression a classifier. +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` + Binary Case ----------- @@ -889,14 +903,24 @@ the probability of the positive class :math:`P(y_i=1|X_i)` as .. math:: \hat{p}(X_i) = \operatorname{expit}(X_i w + w_0) = \frac{1}{1 + \exp(-X_i w - w_0)}. + As an optimization problem, binary class logistic regression with regularization term :math:`r(w)` minimizes the following cost function: -.. math:: \min_{w} C \sum_{i=1}^n \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + r(w). +.. math:: + :name: regularized-logistic-loss + + \min_{w} \frac{1}{S}\sum_{i=1}^n s_i + \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + + \frac{r(w)}{S C}\,, +where :math:`{s_i}` corresponds to the weights assigned by the user to a +specific training sample (the vector :math:`s` is formed by element-wise +multiplication of the class weights and sample weights), +and the sum :math:`S = \sum_{i=1}^n s_i`. -We currently provide four choices for the regularization term :math:`r(w)` via +We currently provide four choices for the regularization term :math:`r(w)` via the `penalty` argument: +----------------+-------------------------------------------------+ @@ -916,6 +940,11 @@ controls the strength of :math:`\ell_1` regularization vs. :math:`\ell_2` regularization. Elastic-Net is equivalent to :math:`\ell_1` when :math:`\rho = 1` and equivalent to :math:`\ell_2` when :math:`\rho=0`. +Note that the scale of the class weights and the sample weights will influence +the optimization problem. For instance, multiplying the sample weights by a +constant :math:`b>0` is equivalent to multiplying the (inverse) regularization +strength `C` by :math:`b`. + Multinomial Case ---------------- @@ -933,33 +962,43 @@ logistic regression, see also `log-linear model especially important when using regularization. The choice of overparameterization can be detrimental for unpenalized models since then the solution may not be unique, as shown in [16]_. -Let :math:`y_i \in {1, \ldots, K}` be the label (ordinal) encoded target variable for observation :math:`i`. -Instead of a single coefficient vector, we now have -a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds to class -:math:`k`. We aim at predicting the class probabilities :math:`P(y_i=k|X_i)` via -:meth:`~sklearn.linear_model.LogisticRegression.predict_proba` as: +.. dropdown:: Mathematical details + + Let :math:`y_i \in {1, \ldots, K}` be the label (ordinal) encoded target variable for observation :math:`i`. + Instead of a single coefficient vector, we now have + a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds to class + :math:`k`. We aim at predicting the class probabilities :math:`P(y_i=k|X_i)` via + :meth:`~sklearn.linear_model.LogisticRegression.predict_proba` as: + + .. math:: \hat{p}_k(X_i) = \frac{\exp(X_i W_k + W_{0, k})}{\sum_{l=0}^{K-1} \exp(X_i W_l + W_{0, l})}. -.. math:: \hat{p}_k(X_i) = \frac{\exp(X_i W_k + W_{0, k})}{\sum_{l=0}^{K-1} \exp(X_i W_l + W_{0, l})}. + The objective for the optimization becomes -The objective for the optimization becomes + .. math:: + \min_W -\frac{1}{S}\sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik} [y_i = k] \log(\hat{p}_k(X_i)) + + \frac{r(W)}{S C}\,, -.. math:: \min_W -C \sum_{i=1}^n \sum_{k=0}^{K-1} [y_i = k] \log(\hat{p}_k(X_i)) + r(W). + where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0` + if :math:`P` is false, otherwise it evaluates to :math:`1`. -Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0` -if :math:`P` is false, otherwise it evaluates to :math:`1`. We currently provide four choices -for the regularization term :math:`r(W)` via the `penalty` argument: + Again, :math:`s_{ik}` are the weights assigned by the user (multiplication of sample + weights and class weights) with their sum :math:`S = \sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik}`. -+----------------+----------------------------------------------------------------------------------+ -| penalty | :math:`r(W)` | -+================+==================================================================================+ -| `None` | :math:`0` | -+----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^n\sum_{j=1}^{K}|W_{i,j}|` | -+----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^{K} W_{i,j}^2` | -+----------------+----------------------------------------------------------------------------------+ -| `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` | -+----------------+----------------------------------------------------------------------------------+ + We currently provide four choices + for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m` + is the number of features: + + +----------------+----------------------------------------------------------------------------------+ + | penalty | :math:`r(W)` | + +================+==================================================================================+ + | `None` | :math:`0` | + +----------------+----------------------------------------------------------------------------------+ + | :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|` | + +----------------+----------------------------------------------------------------------------------+ + | :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2` | + +----------------+----------------------------------------------------------------------------------+ + | `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` | + +----------------+----------------------------------------------------------------------------------+ Solvers ------- @@ -967,69 +1006,24 @@ Solvers The solvers implemented in the class :class:`LogisticRegression` are "lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag" and "saga": -The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies -on the excellent C++ `LIBLINEAR library -`_, which is shipped with -scikit-learn. However, the CD algorithm implemented in liblinear cannot learn -a true multinomial (multiclass) model; instead, the optimization problem is -decomposed in a "one-vs-rest" fashion so separate binary classifiers are -trained for all classes. This happens under the hood, so -:class:`LogisticRegression` instances using this solver behave as multiclass -classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to -calculate the lower bound for C in order to get a non "null" (all feature -weights to zero) model. - -The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` -regularization or no regularization, and are found to converge faster for some -high-dimensional data. Setting `multi_class` to "multinomial" with these solvers -learns a true multinomial logistic regression model [5]_, which means that its -probability estimates should be better calibrated than the default "one-vs-rest" -setting. - -The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster -than other solvers for large datasets, when both the number of samples and the -number of features are large. - -The "saga" solver [7]_ is a variant of "sag" that also supports the -non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse -multinomial logistic regression. It is also the only solver that supports -`penalty="elasticnet"`. - -The "lbfgs" is an optimization algorithm that approximates the -Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to -quasi-Newton methods. As such, it can deal with a wide range of different training -data and is therefore the default solver. Its performance, however, suffers on poorly -scaled datasets and on datasets with one-hot encoded categorical features with rare -categories. - -The "newton-cholesky" solver is an exact Newton solver that calculates the hessian -matrix and solves the resulting linear system. It is a very good choice for -`n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` -regularization is supported. Furthermore, because the hessian matrix is explicitly -computed, the memory usage has a quadratic dependency on `n_features` as well as on -`n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the -multiclass case. - -For a comparison of some of these solvers, see [9]_. - -The following table summarizes the penalties supported by each solver: +The following table summarizes the penalties and multinomial multiclass supported by each solver: +------------------------------+-----------------+-------------+-----------------+-----------------------+-----------+------------+ | | **Solvers** | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ | **Penalties** | **'lbfgs'** | **'liblinear'** | **'newton-cg'** | **'newton-cholesky'** | **'sag'** | **'saga'** | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Multinomial + L2 penalty | yes | no | yes | no | yes | yes | +| L2 penalty | yes | no | yes | no | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| OVR + L2 penalty | yes | yes | yes | yes | yes | yes | +| L1 penalty | no | yes | no | no | no | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Multinomial + L1 penalty | no | no | no | no | no | yes | +| Elastic-Net (L1 + L2) | no | no | no | no | no | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| OVR + L1 penalty | no | yes | no | no | no | yes | +| No penalty ('none') | yes | no | yes | yes | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Elastic-Net | no | no | no | no | no | yes | +| **Multiclass support** | | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| No penalty ('none') | yes | no | yes | yes | yes | yes | +| multinomial multiclass | yes | no | yes | no | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ | **Behaviors** | | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ @@ -1045,32 +1039,88 @@ the "saga" solver is usually faster. For large dataset, you may also consider using :class:`SGDClassifier` with `loss="log_loss"`, which might be even faster but requires more tuning. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` - .. _liblinear_differences: -.. topic:: Differences from liblinear: +Differences between solvers +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There might be a difference in the scores obtained between +:class:`LogisticRegression` with ``solver=liblinear`` or +:class:`~sklearn.svm.LinearSVC` and the external liblinear library directly, +when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to be predicted +are zeroes. This is because for the sample(s) with ``decision_function`` zero, +:class:`LogisticRegression` and :class:`~sklearn.svm.LinearSVC` predict the +negative class, while liblinear predicts the positive class. Note that a model +with ``fit_intercept=False`` and having many samples with ``decision_function`` +zero, is likely to be a underfit, bad model and you are advised to set +``fit_intercept=True`` and increase the ``intercept_scaling``. + +.. dropdown:: Solvers' details + + * The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies + on the excellent C++ `LIBLINEAR library + `_, which is shipped with + scikit-learn. However, the CD algorithm implemented in liblinear cannot learn + a true multinomial (multiclass) model; instead, the optimization problem is + decomposed in a "one-vs-rest" fashion so separate binary classifiers are + trained for all classes. This happens under the hood, so + :class:`LogisticRegression` instances using this solver behave as multiclass + classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to + calculate the lower bound for C in order to get a non "null" (all feature + weights to zero) model. + + * The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` + regularization or no regularization, and are found to converge faster for some + high-dimensional data. Setting `multi_class` to "multinomial" with these solvers + learns a true multinomial logistic regression model [5]_, which means that its + probability estimates should be better calibrated than the default "one-vs-rest" + setting. + + * The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster + than other solvers for large datasets, when both the number of samples and the + number of features are large. + + * The "saga" solver [7]_ is a variant of "sag" that also supports the + non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse + multinomial logistic regression. It is also the only solver that supports + `penalty="elasticnet"`. + + * The "lbfgs" is an optimization algorithm that approximates the + Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to + quasi-Newton methods. As such, it can deal with a wide range of different training + data and is therefore the default solver. Its performance, however, suffers on poorly + scaled datasets and on datasets with one-hot encoded categorical features with rare + categories. + + * The "newton-cholesky" solver is an exact Newton solver that calculates the hessian + matrix and solves the resulting linear system. It is a very good choice for + `n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` + regularization is supported. Furthermore, because the hessian matrix is explicitly + computed, the memory usage has a quadratic dependency on `n_features` as well as on + `n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the + multiclass case. + + For a comparison of some of these solvers, see [9]_. + + .. rubric:: References + + .. [5] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 4.3.4 + + .. [6] Mark Schmidt, Nicolas Le Roux, and Francis Bach: `Minimizing Finite Sums with the Stochastic Average Gradient. `_ + + .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien: + :arxiv:`SAGA: A Fast Incremental Gradient Method With Support for + Non-Strongly Convex Composite Objectives. <1407.0202>` + + .. [8] https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm + + .. [9] Thomas P. Minka `"A comparison of numerical optimizers for logistic regression" + `_ + + .. [16] :arxiv:`Simon, Noah, J. Friedman and T. Hastie. + "A Blockwise Descent Algorithm for Group-penalized Multiresponse and + Multinomial Regression." <1311.6529>` - There might be a difference in the scores obtained between - :class:`LogisticRegression` with ``solver=liblinear`` - or :class:`LinearSVC` and the external liblinear library directly, - when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to - be predicted are zeroes. This is because for the sample(s) with - ``decision_function`` zero, :class:`LogisticRegression` and :class:`LinearSVC` - predict the negative class, while liblinear predicts the positive class. - Note that a model with ``fit_intercept=False`` and having many samples with - ``decision_function`` zero, is likely to be a underfit, bad model and you are - advised to set ``fit_intercept=True`` and increase the intercept_scaling. .. note:: **Feature selection with sparse logistic regression** @@ -1092,25 +1142,6 @@ according to the ``scoring`` attribute. The "newton-cg", "sag", "saga" and "lbfgs" solvers are found to be faster for high-dimensional dense data, due to warm-starting (see :term:`Glossary `). -.. topic:: References: - - .. [5] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 4.3.4 - - .. [6] Mark Schmidt, Nicolas Le Roux, and Francis Bach: `Minimizing Finite Sums with the Stochastic Average Gradient. `_ - - .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien: - :arxiv:`SAGA: A Fast Incremental Gradient Method With Support for - Non-Strongly Convex Composite Objectives. <1407.0202>` - - .. [8] https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm - - .. [9] Thomas P. Minka `"A comparison of numerical optimizers for logistic regression" - `_ - - .. [16] :arxiv:`Simon, Noah, J. Friedman and T. Hastie. - "A Blockwise Descent Algorithm for Group-penalized Multiresponse and - Multinomial Regression." <1311.6529>` - .. _Generalized_linear_regression: .. _Generalized_linear_models: @@ -1145,7 +1176,7 @@ Normal :math:`y \in (-\infty, \infty)` :math:`(y-\hat{y})^2` Bernoulli :math:`y \in \{0, 1\}` :math:`2({y}\log\frac{y}{\hat{y}}+({1}-{y})\log\frac{{1}-{y}}{{1}-\hat{y}})` Categorical :math:`y \in \{0, 1, ..., k\}` :math:`2\sum_{i \in \{0, 1, ..., k\}} I(y = i) y_\text{i}\log\frac{I(y = i)}{\hat{I(y = i)}}` Poisson :math:`y \in [0, \infty)` :math:`2(y\log\frac{y}{\hat{y}}-y+\hat{y})` -Gamma :math:`y \in (0, \infty)` :math:`2(\log\frac{y}{\hat{y}}+\frac{y}{\hat{y}}-1)` +Gamma :math:`y \in (0, \infty)` :math:`2(\log\frac{\hat{y}}{y}+\frac{y}{\hat{y}}-1)` Inverse Gaussian :math:`y \in (0, \infty)` :math:`\frac{(y-\hat{y})^2}{y\hat{y}^2}` ================= ================================ ============================================ @@ -1186,34 +1217,34 @@ The choice of the distribution depends on the problem at hand: used for multiclass classification. -Examples of use cases include: +.. dropdown:: Examples of use cases -* Agriculture / weather modeling: number of rain events per year (Poisson), - amount of rainfall per event (Gamma), total rainfall per year (Tweedie / - Compound Poisson Gamma). -* Risk modeling / insurance policy pricing: number of claim events / - policyholder per year (Poisson), cost per event (Gamma), total cost per - policyholder per year (Tweedie / Compound Poisson Gamma). -* Credit Default: probability that a loan can't be paid back (Bernouli). -* Fraud Detection: probability that a financial transaction like a cash transfer - is a fraudulent transaction (Bernoulli). -* Predictive maintenance: number of production interruption events per year - (Poisson), duration of interruption (Gamma), total interruption time per year - (Tweedie / Compound Poisson Gamma). -* Medical Drug Testing: probability of curing a patient in a set of trials or - probability that a patient will experience side effects (Bernoulli). -* News Classification: classification of news articles into three categories - namely Business News, Politics and Entertainment news (Categorical). + * Agriculture / weather modeling: number of rain events per year (Poisson), + amount of rainfall per event (Gamma), total rainfall per year (Tweedie / + Compound Poisson Gamma). + * Risk modeling / insurance policy pricing: number of claim events / + policyholder per year (Poisson), cost per event (Gamma), total cost per + policyholder per year (Tweedie / Compound Poisson Gamma). + * Credit Default: probability that a loan can't be paid back (Bernoulli). + * Fraud Detection: probability that a financial transaction like a cash transfer + is a fraudulent transaction (Bernoulli). + * Predictive maintenance: number of production interruption events per year + (Poisson), duration of interruption (Gamma), total interruption time per year + (Tweedie / Compound Poisson Gamma). + * Medical Drug Testing: probability of curing a patient in a set of trials or + probability that a patient will experience side effects (Bernoulli). + * News Classification: classification of news articles into three categories + namely Business News, Politics and Entertainment news (Categorical). -.. topic:: References: +.. rubric:: References - .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, - Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. +.. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, + Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. - .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models - and analysis of deviance. Monografias de matemÃĄtica, no. 51. See also - `Exponential dispersion model. - `_ +.. [11] Jørgensen, B. (1992). The theory of exponential dispersion models + and analysis of deviance. Monografias de matemÃĄtica, no. 51. See also + `Exponential dispersion model. + `_ Usage ----- @@ -1247,34 +1278,33 @@ Usage example:: -0.7638... -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py` -Practical considerations ------------------------- +.. dropdown:: Practical considerations -The feature matrix `X` should be standardized before fitting. This ensures -that the penalty treats features equally. + The feature matrix `X` should be standardized before fitting. This ensures + that the penalty treats features equally. -Since the linear predictor :math:`Xw` can be negative and Poisson, -Gamma and Inverse Gaussian distributions don't support negative values, it -is necessary to apply an inverse link function that guarantees the -non-negativeness. For example with `link='log'`, the inverse link function -becomes :math:`h(Xw)=\exp(Xw)`. + Since the linear predictor :math:`Xw` can be negative and Poisson, + Gamma and Inverse Gaussian distributions don't support negative values, it + is necessary to apply an inverse link function that guarantees the + non-negativeness. For example with `link='log'`, the inverse link function + becomes :math:`h(Xw)=\exp(Xw)`. -If you want to model a relative frequency, i.e. counts per exposure (time, -volume, ...) you can do so by using a Poisson distribution and passing -:math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values -together with :math:`\mathrm{exposure}` as sample weights. For a concrete -example see e.g. -:ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`. + If you want to model a relative frequency, i.e. counts per exposure (time, + volume, ...) you can do so by using a Poisson distribution and passing + :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values + together with :math:`\mathrm{exposure}` as sample weights. For a concrete + example see e.g. + :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`. -When performing cross-validation for the `power` parameter of -`TweedieRegressor`, it is advisable to specify an explicit `scoring` function, -because the default scorer :meth:`TweedieRegressor.score` is a function of -`power` itself. + When performing cross-validation for the `power` parameter of + `TweedieRegressor`, it is advisable to specify an explicit `scoring` function, + because the default scorer :meth:`TweedieRegressor.score` is a function of + `power` itself. Stochastic Gradient Descent - SGD ================================= @@ -1291,9 +1321,7 @@ E.g., with ``loss="log"``, :class:`SGDClassifier` fits a logistic regression model, while with ``loss="hinge"`` it fits a linear support vector machine (SVM). -.. topic:: References - - * :ref:`sgd` +You can refer to the dedicated :ref:`sgd` documentation section for more details. .. _perceptron: @@ -1303,16 +1331,21 @@ Perceptron The :class:`Perceptron` is another simple classification algorithm suitable for large scale learning. By default: - - It does not require a learning rate. +- It does not require a learning rate. - - It is not regularized (penalized). +- It is not regularized (penalized). - - It updates its model only on mistakes. +- It updates its model only on mistakes. The last characteristic implies that the Perceptron is slightly faster to train than SGD with the hinge loss and that the resulting models are sparser. +In fact, the :class:`Perceptron` is a wrapper around the :class:`SGDClassifier` +class using a perceptron loss and a constant learning rate. Refer to +:ref:`mathematical section ` of the SGD procedure +for more details. + .. _passive_aggressive: Passive Aggressive Algorithms @@ -1329,13 +1362,11 @@ For classification, :class:`PassiveAggressiveClassifier` can be used with ``loss='epsilon_insensitive'`` (PA-I) or ``loss='squared_epsilon_insensitive'`` (PA-II). -.. topic:: References: - - - * `"Online Passive-Aggressive Algorithms" - `_ - K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) +.. dropdown:: References + * `"Online Passive-Aggressive Algorithms" + `_ + K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) Robustness regression: outliers and modeling errors ===================================================== @@ -1394,7 +1425,7 @@ Note that in general, robust fitting in high-dimensional setting (large in these settings. -.. topic:: **Trade-offs: which estimator?** +.. topic:: Trade-offs: which estimator ? Scikit-learn provides 3 robust regression estimators: :ref:`RANSAC `, @@ -1403,7 +1434,7 @@ in these settings. * :ref:`HuberRegressor ` should be faster than :ref:`RANSAC ` and :ref:`Theil Sen ` - unless the number of samples are very large, i.e ``n_samples`` >> ``n_features``. + unless the number of samples are very large, i.e. ``n_samples`` >> ``n_features``. This is because :ref:`RANSAC ` and :ref:`Theil Sen ` fit on smaller subsets of the data. However, both :ref:`Theil Sen ` and :ref:`RANSAC ` are unlikely to be as robust as @@ -1419,7 +1450,7 @@ in these settings. medium-size outliers in the X direction, but this property will disappear in high-dimensional settings. - When in doubt, use :ref:`RANSAC `. + When in doubt, use :ref:`RANSAC `. .. _ransac_regression: @@ -1445,50 +1476,48 @@ estimated only from the determined inliers. :align: center :scale: 50% -Details of the algorithm -^^^^^^^^^^^^^^^^^^^^^^^^ - -Each iteration performs the following steps: - -1. Select ``min_samples`` random samples from the original data and check - whether the set of data is valid (see ``is_data_valid``). -2. Fit a model to the random subset (``base_estimator.fit``) and check - whether the estimated model is valid (see ``is_model_valid``). -3. Classify all data as inliers or outliers by calculating the residuals - to the estimated model (``base_estimator.predict(X) - y``) - all data - samples with absolute residuals smaller than or equal to the - ``residual_threshold`` are considered as inliers. -4. Save fitted model as best model if number of inlier samples is - maximal. In case the current estimated model has the same number of - inliers, it is only considered as the best model if it has better score. - -These steps are performed either a maximum number of times (``max_trials``) or -until one of the special stop criteria are met (see ``stop_n_inliers`` and -``stop_score``). The final model is estimated using all inlier samples (consensus -set) of the previously determined best model. - -The ``is_data_valid`` and ``is_model_valid`` functions allow to identify and reject -degenerate combinations of random sub-samples. If the estimated model is not -needed for identifying degenerate cases, ``is_data_valid`` should be used as it -is called prior to fitting the model and thus leading to better computational -performance. - - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` - -.. topic:: References: - - * https://en.wikipedia.org/wiki/RANSAC - * `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to - Image Analysis and Automated Cartography" - `_ - Martin A. Fischler and Robert C. Bolles - SRI International (1981) - * `"Performance Evaluation of RANSAC Family" - `_ - Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` + +.. dropdown:: Details of the algorithm + + Each iteration performs the following steps: + + 1. Select ``min_samples`` random samples from the original data and check + whether the set of data is valid (see ``is_data_valid``). + 2. Fit a model to the random subset (``estimator.fit``) and check + whether the estimated model is valid (see ``is_model_valid``). + 3. Classify all data as inliers or outliers by calculating the residuals + to the estimated model (``estimator.predict(X) - y``) - all data + samples with absolute residuals smaller than or equal to the + ``residual_threshold`` are considered as inliers. + 4. Save fitted model as best model if number of inlier samples is + maximal. In case the current estimated model has the same number of + inliers, it is only considered as the best model if it has better score. + + These steps are performed either a maximum number of times (``max_trials``) or + until one of the special stop criteria are met (see ``stop_n_inliers`` and + ``stop_score``). The final model is estimated using all inlier samples (consensus + set) of the previously determined best model. + + The ``is_data_valid`` and ``is_model_valid`` functions allow to identify and reject + degenerate combinations of random sub-samples. If the estimated model is not + needed for identifying degenerate cases, ``is_data_valid`` should be used as it + is called prior to fitting the model and thus leading to better computational + performance. + +.. dropdown:: References + + * https://en.wikipedia.org/wiki/RANSAC + * `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to + Image Analysis and Automated Cartography" + `_ + Martin A. Fischler and Robert C. Bolles - SRI International (1981) + * `"Performance Evaluation of RANSAC Family" + `_ + Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) .. _theil_sen_regression: @@ -1501,57 +1530,52 @@ that the robustness of the estimator decreases quickly with the dimensionality of the problem. It loses its robustness properties and becomes no better than an ordinary least squares in high dimension. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` +.. rubric:: Examples -.. topic:: References: +* :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` - * https://en.wikipedia.org/wiki/Theil%E2%80%93Sen_estimator -Theoretical considerations -^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. dropdown:: Theoretical considerations -:class:`TheilSenRegressor` is comparable to the :ref:`Ordinary Least Squares -(OLS) ` in terms of asymptotic efficiency and as an -unbiased estimator. In contrast to OLS, Theil-Sen is a non-parametric -method which means it makes no assumption about the underlying -distribution of the data. Since Theil-Sen is a median-based estimator, it -is more robust against corrupted data aka outliers. In univariate -setting, Theil-Sen has a breakdown point of about 29.3% in case of a -simple linear regression which means that it can tolerate arbitrary -corrupted data of up to 29.3%. + :class:`TheilSenRegressor` is comparable to the :ref:`Ordinary Least Squares + (OLS) ` in terms of asymptotic efficiency and as an + unbiased estimator. In contrast to OLS, Theil-Sen is a non-parametric + method which means it makes no assumption about the underlying + distribution of the data. Since Theil-Sen is a median-based estimator, it + is more robust against corrupted data aka outliers. In univariate + setting, Theil-Sen has a breakdown point of about 29.3% in case of a + simple linear regression which means that it can tolerate arbitrary + corrupted data of up to 29.3%. -.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_theilsen_001.png - :target: ../auto_examples/linear_model/plot_theilsen.html - :align: center - :scale: 50% + .. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_theilsen_001.png + :target: ../auto_examples/linear_model/plot_theilsen.html + :align: center + :scale: 50% -The implementation of :class:`TheilSenRegressor` in scikit-learn follows a -generalization to a multivariate linear regression model [#f1]_ using the -spatial median which is a generalization of the median to multiple -dimensions [#f2]_. + The implementation of :class:`TheilSenRegressor` in scikit-learn follows a + generalization to a multivariate linear regression model [#f1]_ using the + spatial median which is a generalization of the median to multiple + dimensions [#f2]_. -In terms of time and space complexity, Theil-Sen scales according to + In terms of time and space complexity, Theil-Sen scales according to -.. math:: - \binom{n_{\text{samples}}}{n_{\text{subsamples}}} + .. math:: + \binom{n_{\text{samples}}}{n_{\text{subsamples}}} -which makes it infeasible to be applied exhaustively to problems with a -large number of samples and features. Therefore, the magnitude of a -subpopulation can be chosen to limit the time and space complexity by -considering only a random subset of all possible combinations. + which makes it infeasible to be applied exhaustively to problems with a + large number of samples and features. Therefore, the magnitude of a + subpopulation can be chosen to limit the time and space complexity by + considering only a random subset of all possible combinations. -.. topic:: Examples: + .. rubric:: References - * :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` + .. [#f1] Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang: `Theil-Sen Estimators in a Multiple Linear Regression Model. `_ -.. topic:: References: + .. [#f2] T. Kärkkäinen and S. ÄyrämÃļ: `On Computation of Spatial Median for Robust Data Mining. `_ - .. [#f1] Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang: `Theil-Sen Estimators in a Multiple Linear Regression Model. `_ + Also see the `Wikipedia page `_ - .. [#f2] T. Kärkkäinen and S. ÄyrämÃļ: `On Computation of Spatial Median for Robust Data Mining. `_ .. _huber_regression: @@ -1570,25 +1594,35 @@ but gives a lesser weight to them. :align: center :scale: 50% -The loss function that :class:`HuberRegressor` minimizes is given by +.. rubric:: Examples -.. math:: +* :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` - \min_{w, \sigma} {\sum_{i=1}^n\left(\sigma + H_{\epsilon}\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \alpha {||w||_2}^2} +.. dropdown:: Mathematical details -where + The loss function that :class:`HuberRegressor` minimizes is given by -.. math:: + .. math:: - H_{\epsilon}(z) = \begin{cases} - z^2, & \text {if } |z| < \epsilon, \\ - 2\epsilon|z| - \epsilon^2, & \text{otherwise} - \end{cases} + \min_{w, \sigma} {\sum_{i=1}^n\left(\sigma + H_{\epsilon}\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \alpha {||w||_2}^2} -It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% statistical efficiency. + where + + .. math:: + + H_{\epsilon}(z) = \begin{cases} + z^2, & \text {if } |z| < \epsilon, \\ + 2\epsilon|z| - \epsilon^2, & \text{otherwise} + \end{cases} + + It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% + statistical efficiency. + + .. rubric:: References + + * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale + estimates, pg 172 -Notes ------ The :class:`HuberRegressor` differs from using :class:`SGDRegressor` with loss set to `huber` in the following ways. @@ -1601,14 +1635,6 @@ in the following ways. samples while :class:`SGDRegressor` needs a number of passes on the training data to produce the same robustness. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` - -.. topic:: References: - - * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 - Note that this estimator is different from the R implementation of Robust Regression (https://stats.oarc.ucla.edu/r/dae/robust-regression/) because the R implementation does a weighted least squares implementation with weights given to each sample on the basis of how much the residual is @@ -1623,33 +1649,6 @@ Quantile regression estimates the median or other quantiles of :math:`y` conditional on :math:`X`, while ordinary least squares (OLS) estimates the conditional mean. -As a linear model, the :class:`QuantileRegressor` gives linear predictions -:math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`. -The weights or coefficients :math:`w` are then found by the following -minimization problem: - -.. math:: - \min_{w} {\frac{1}{n_{\text{samples}}} - \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}. - -This consists of the pinball loss (also known as linear loss), -see also :class:`~sklearn.metrics.mean_pinball_loss`, - -.. math:: - PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) = - \begin{cases} - q t, & t > 0, \\ - 0, & t = 0, \\ - (q-1) t, & t < 0 - \end{cases} - -and the L1 penalty controlled by parameter ``alpha``, similar to -:class:`Lasso`. - -As the pinball loss is only linear in the residuals, quantile regression is -much more robust to outliers than squared error based estimation of the mean. -Somewhat in between is the :class:`HuberRegressor`. - Quantile regression may be useful if one is interested in predicting an interval instead of point prediction. Sometimes, prediction intervals are calculated based on the assumption that prediction error is distributed @@ -1673,11 +1672,40 @@ Most implementations of quantile regression are based on linear programming problem. The current implementation is based on :func:`scipy.optimize.linprog`. -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` + +.. dropdown:: Mathematical details + + As a linear model, the :class:`QuantileRegressor` gives linear predictions + :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`. + The weights or coefficients :math:`w` are then found by the following + minimization problem: - * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` + .. math:: + \min_{w} {\frac{1}{n_{\text{samples}}} + \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}. -.. topic:: References: + This consists of the pinball loss (also known as linear loss), + see also :class:`~sklearn.metrics.mean_pinball_loss`, + + .. math:: + PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) = + \begin{cases} + q t, & t > 0, \\ + 0, & t = 0, \\ + (q-1) t, & t < 0 + \end{cases} + + and the L1 penalty controlled by parameter ``alpha``, similar to + :class:`Lasso`. + + As the pinball loss is only linear in the residuals, quantile regression is + much more robust to outliers than squared error based estimation of the mean. + Somewhat in between is the :class:`HuberRegressor`. + +.. dropdown:: References * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. `_ @@ -1703,32 +1731,34 @@ on nonlinear functions of the data. This approach maintains the generally fast performance of linear methods, while allowing them to fit a much wider range of data. -For example, a simple linear regression can be extended by constructing -**polynomial features** from the coefficients. In the standard linear -regression case, you might have a model that looks like this for -two-dimensional data: +.. dropdown:: Mathematical details + + For example, a simple linear regression can be extended by constructing + **polynomial features** from the coefficients. In the standard linear + regression case, you might have a model that looks like this for + two-dimensional data: -.. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + .. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 -If we want to fit a paraboloid to the data instead of a plane, we can combine -the features in second-order polynomials, so that the model looks like this: + If we want to fit a paraboloid to the data instead of a plane, we can combine + the features in second-order polynomials, so that the model looks like this: -.. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + w_3 x_1 x_2 + w_4 x_1^2 + w_5 x_2^2 + .. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + w_3 x_1 x_2 + w_4 x_1^2 + w_5 x_2^2 -The (sometimes surprising) observation is that this is *still a linear model*: -to see this, imagine creating a new set of features + The (sometimes surprising) observation is that this is *still a linear model*: + to see this, imagine creating a new set of features -.. math:: z = [x_1, x_2, x_1 x_2, x_1^2, x_2^2] + .. math:: z = [x_1, x_2, x_1 x_2, x_1^2, x_2^2] -With this re-labeling of the data, our problem can be written + With this re-labeling of the data, our problem can be written -.. math:: \hat{y}(w, z) = w_0 + w_1 z_1 + w_2 z_2 + w_3 z_3 + w_4 z_4 + w_5 z_5 + .. math:: \hat{y}(w, z) = w_0 + w_1 z_1 + w_2 z_2 + w_3 z_3 + w_4 z_4 + w_5 z_5 -We see that the resulting *polynomial regression* is in the same class of -linear models we considered above (i.e. the model is linear in :math:`w`) -and can be solved by the same techniques. By considering linear fits within -a higher-dimensional space built with these basis functions, the model has the -flexibility to fit a much broader range of data. + We see that the resulting *polynomial regression* is in the same class of + linear models we considered above (i.e. the model is linear in :math:`w`) + and can be solved by the same techniques. By considering linear fits within + a higher-dimensional space built with these basis functions, the model has the + flexibility to fit a much broader range of data. Here is an example of applying this idea to one-dimensional data, using polynomial features of varying degrees: diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst index 40bbea17a8309..785fba3097edf 100644 --- a/doc/modules/manifold.rst +++ b/doc/modules/manifold.rst @@ -102,13 +102,13 @@ unsupervised: it learns the high-dimensional structure of the data from the data itself, without the use of predetermined classifications. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` for an example of - dimensionality reduction on handwritten digits. +* See :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` for an example of + dimensionality reduction on handwritten digits. - * See :ref:`sphx_glr_auto_examples_manifold_plot_compare_methods.py` for an example of - dimensionality reduction on a toy "S-curve" dataset. +* See :ref:`sphx_glr_auto_examples_manifold_plot_compare_methods.py` for an example of + dimensionality reduction on a toy "S-curve" dataset. The manifold learning implementations available in scikit-learn are summarized below @@ -130,43 +130,43 @@ distances between all points. Isomap can be performed with the object :align: center :scale: 50 -Complexity ----------- -The Isomap algorithm comprises three stages: +.. dropdown:: Complexity -1. **Nearest neighbor search.** Isomap uses - :class:`~sklearn.neighbors.BallTree` for efficient neighbor search. - The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k` - nearest neighbors of :math:`N` points in :math:`D` dimensions. + The Isomap algorithm comprises three stages: -2. **Shortest-path graph search.** The most efficient known algorithms - for this are *Dijkstra's Algorithm*, which is approximately - :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which - is :math:`O[N^3]`. The algorithm can be selected by the user with - the ``path_method`` keyword of ``Isomap``. If unspecified, the code - attempts to choose the best algorithm for the input data. + 1. **Nearest neighbor search.** Isomap uses + :class:`~sklearn.neighbors.BallTree` for efficient neighbor search. + The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k` + nearest neighbors of :math:`N` points in :math:`D` dimensions. -3. **Partial eigenvalue decomposition.** The embedding is encoded in the - eigenvectors corresponding to the :math:`d` largest eigenvalues of the - :math:`N \times N` isomap kernel. For a dense solver, the cost is - approximately :math:`O[d N^2]`. This cost can often be improved using - the ``ARPACK`` solver. The eigensolver can be specified by the user - with the ``eigen_solver`` keyword of ``Isomap``. If unspecified, the - code attempts to choose the best algorithm for the input data. + 2. **Shortest-path graph search.** The most efficient known algorithms + for this are *Dijkstra's Algorithm*, which is approximately + :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which + is :math:`O[N^3]`. The algorithm can be selected by the user with + the ``path_method`` keyword of ``Isomap``. If unspecified, the code + attempts to choose the best algorithm for the input data. -The overall complexity of Isomap is -:math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`. + 3. **Partial eigenvalue decomposition.** The embedding is encoded in the + eigenvectors corresponding to the :math:`d` largest eigenvalues of the + :math:`N \times N` isomap kernel. For a dense solver, the cost is + approximately :math:`O[d N^2]`. This cost can often be improved using + the ``ARPACK`` solver. The eigensolver can be specified by the user + with the ``eigen_solver`` keyword of ``Isomap``. If unspecified, the + code attempts to choose the best algorithm for the input data. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + The overall complexity of Isomap is + :math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`. -.. topic:: References: + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension - * `"A global geometric framework for nonlinear dimensionality reduction" - `_ - Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. Science 290 (5500) +.. rubric:: References + +* `"A global geometric framework for nonlinear dimensionality reduction" + `_ + Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. Science 290 (5500) .. _locally_linear_embedding: @@ -187,33 +187,32 @@ Locally linear embedding can be performed with function :align: center :scale: 50 -Complexity ----------- +.. dropdown:: Complexity -The standard LLE algorithm comprises three stages: + The standard LLE algorithm comprises three stages: -1. **Nearest Neighbors Search**. See discussion under Isomap above. + 1. **Nearest Neighbors Search**. See discussion under Isomap above. -2. **Weight Matrix Construction**. :math:`O[D N k^3]`. - The construction of the LLE weight matrix involves the solution of a - :math:`k \times k` linear equation for each of the :math:`N` local - neighborhoods + 2. **Weight Matrix Construction**. :math:`O[D N k^3]`. + The construction of the LLE weight matrix involves the solution of a + :math:`k \times k` linear equation for each of the :math:`N` local + neighborhoods. -3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above. + 3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above. -The overall complexity of standard LLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. + The overall complexity of standard LLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"Nonlinear dimensionality reduction by locally linear embedding" - `_ - Roweis, S. & Saul, L. Science 290:2323 (2000) +* `"Nonlinear dimensionality reduction by locally linear embedding" + `_ + Roweis, S. & Saul, L. Science 290:2323 (2000) Modified Locally Linear Embedding @@ -241,35 +240,34 @@ It requires ``n_neighbors > n_components``. :align: center :scale: 50 -Complexity ----------- +.. dropdown:: Complexity -The MLLE algorithm comprises three stages: + The MLLE algorithm comprises three stages: -1. **Nearest Neighbors Search**. Same as standard LLE + 1. **Nearest Neighbors Search**. Same as standard LLE -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[N (k-D) k^2]`. The first term is exactly equivalent - to that of standard LLE. The second term has to do with constructing the - weight matrix from multiple weights. In practice, the added cost of - constructing the MLLE weight matrix is relatively small compared to the - cost of stages 1 and 3. + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N (k-D) k^2]`. The first term is exactly equivalent + to that of standard LLE. The second term has to do with constructing the + weight matrix from multiple weights. In practice, the added cost of + constructing the MLLE weight matrix is relatively small compared to the + cost of stages 1 and 3. -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE -The overall complexity of MLLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`. + The overall complexity of MLLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" - `_ - Zhang, Z. & Wang, J. +* `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" + `_ + Zhang, Z. & Wang, J. Hessian Eigenmapping @@ -291,33 +289,32 @@ It requires ``n_neighbors > n_components * (n_components + 3) / 2``. :align: center :scale: 50 -Complexity ----------- +.. dropdown:: Complexity The HLLE algorithm comprises three stages: -1. **Nearest Neighbors Search**. Same as standard LLE + 1. **Nearest Neighbors Search**. Same as standard LLE -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[N d^6]`. The first term reflects a similar - cost to that of standard LLE. The second term comes from a QR - decomposition of the local hessian estimator. + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N d^6]`. The first term reflects a similar + cost to that of standard LLE. The second term comes from a QR + decomposition of the local hessian estimator. -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE -The overall complexity of standard HLLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`. + The overall complexity of standard HLLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"Hessian Eigenmaps: Locally linear embedding techniques for - high-dimensional data" `_ - Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003) +* `"Hessian Eigenmaps: Locally linear embedding techniques for + high-dimensional data" `_ + Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003) .. _spectral_embedding: @@ -335,35 +332,34 @@ preserving local distances. Spectral embedding can be performed with the function :func:`spectral_embedding` or its object-oriented counterpart :class:`SpectralEmbedding`. -Complexity ----------- +.. dropdown:: Complexity -The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages: + The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages: -1. **Weighted Graph Construction**. Transform the raw input data into - graph representation using affinity (adjacency) matrix representation. + 1. **Weighted Graph Construction**. Transform the raw input data into + graph representation using affinity (adjacency) matrix representation. -2. **Graph Laplacian Construction**. unnormalized Graph Laplacian - is constructed as :math:`L = D - A` for and normalized one as - :math:`L = D^{-\frac{1}{2}} (D - A) D^{-\frac{1}{2}}`. + 2. **Graph Laplacian Construction**. unnormalized Graph Laplacian + is constructed as :math:`L = D - A` for and normalized one as + :math:`L = D^{-\frac{1}{2}} (D - A) D^{-\frac{1}{2}}`. -3. **Partial Eigenvalue Decomposition**. Eigenvalue decomposition is - done on graph Laplacian + 3. **Partial Eigenvalue Decomposition**. Eigenvalue decomposition is + done on graph Laplacian. -The overall complexity of spectral embedding is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. + The overall complexity of spectral embedding is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"Laplacian Eigenmaps for Dimensionality Reduction - and Data Representation" - `_ - M. Belkin, P. Niyogi, Neural Computation, June 2003; 15 (6):1373-1396 +* `"Laplacian Eigenmaps for Dimensionality Reduction + and Data Representation" + `_ + M. Belkin, P. Niyogi, Neural Computation, June 2003; 15 (6):1373-1396 Local Tangent Space Alignment @@ -383,33 +379,32 @@ tangent spaces to learn the embedding. LTSA can be performed with function :align: center :scale: 50 -Complexity ----------- +.. dropdown:: Complexity -The LTSA algorithm comprises three stages: + The LTSA algorithm comprises three stages: -1. **Nearest Neighbors Search**. Same as standard LLE + 1. **Nearest Neighbors Search**. Same as standard LLE -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[k^2 d]`. The first term reflects a similar - cost to that of standard LLE. + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[k^2 d]`. The first term reflects a similar + cost to that of standard LLE. -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE -The overall complexity of standard LTSA is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`. + The overall complexity of standard LTSA is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via - tangent space alignment" - ` - Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004) +* :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via + tangent space alignment" + ` + Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004) .. _multidimensional_scaling: @@ -448,62 +443,59 @@ the similarities chosen in some optimal ways. The objective, called the stress, is then defined by :math:`\sum_{i < j} d_{ij}(X) - \hat{d}_{ij}(X)` -Metric MDS ----------- - -The simplest metric :class:`MDS` model, called *absolute MDS*, disparities are defined by -:math:`\hat{d}_{ij} = S_{ij}`. With absolute MDS, the value :math:`S_{ij}` -should then correspond exactly to the distance between point :math:`i` and -:math:`j` in the embedding point. +.. dropdown:: Metric MDS -Most commonly, disparities are set to :math:`\hat{d}_{ij} = b S_{ij}`. + The simplest metric :class:`MDS` model, called *absolute MDS*, disparities are defined by + :math:`\hat{d}_{ij} = S_{ij}`. With absolute MDS, the value :math:`S_{ij}` + should then correspond exactly to the distance between point :math:`i` and + :math:`j` in the embedding point. -Nonmetric MDS -------------- + Most commonly, disparities are set to :math:`\hat{d}_{ij} = b S_{ij}`. -Non metric :class:`MDS` focuses on the ordination of the data. If -:math:`S_{ij} > S_{jk}`, then the embedding should enforce :math:`d_{ij} < -d_{jk}`. For this reason, we discuss it in terms of dissimilarities -(:math:`\delta_{ij}`) instead of similarities (:math:`S_{ij}`). Note that -dissimilarities can easily be obtained from similarities through a simple -transform, e.g. :math:`\delta_{ij}=c_1-c_2 S_{ij}` for some real constants -:math:`c_1, c_2`. A simple algorithm to enforce proper ordination is to use a -monotonic regression of :math:`d_{ij}` on :math:`\delta_{ij}`, yielding -disparities :math:`\hat{d}_{ij}` in the same order as :math:`\delta_{ij}`. +.. dropdown:: Nonmetric MDS -A trivial solution to this problem is to set all the points on the origin. In -order to avoid that, the disparities :math:`\hat{d}_{ij}` are normalized. Note -that since we only care about relative ordering, our objective should be -invariant to simple translation and scaling, however the stress used in metric -MDS is sensitive to scaling. To address this, non-metric MDS may use a -normalized stress, known as Stress-1 defined as + Non metric :class:`MDS` focuses on the ordination of the data. If + :math:`S_{ij} > S_{jk}`, then the embedding should enforce :math:`d_{ij} < + d_{jk}`. For this reason, we discuss it in terms of dissimilarities + (:math:`\delta_{ij}`) instead of similarities (:math:`S_{ij}`). Note that + dissimilarities can easily be obtained from similarities through a simple + transform, e.g. :math:`\delta_{ij}=c_1-c_2 S_{ij}` for some real constants + :math:`c_1, c_2`. A simple algorithm to enforce proper ordination is to use a + monotonic regression of :math:`d_{ij}` on :math:`\delta_{ij}`, yielding + disparities :math:`\hat{d}_{ij}` in the same order as :math:`\delta_{ij}`. -.. math:: - \sqrt{\frac{\sum_{i < j} (d_{ij} - \hat{d}_{ij})^2}{\sum_{i < j} d_{ij}^2}}. + A trivial solution to this problem is to set all the points on the origin. In + order to avoid that, the disparities :math:`\hat{d}_{ij}` are normalized. Note + that since we only care about relative ordering, our objective should be + invariant to simple translation and scaling, however the stress used in metric + MDS is sensitive to scaling. To address this, non-metric MDS may use a + normalized stress, known as Stress-1 defined as -The use of normalized Stress-1 can be enabled by setting `normalized_stress=True`, -however it is only compatible with the non-metric MDS problem and will be ignored -in the metric case. + .. math:: + \sqrt{\frac{\sum_{i < j} (d_{ij} - \hat{d}_{ij})^2}{\sum_{i < j} d_{ij}^2}}. -.. figure:: ../auto_examples/manifold/images/sphx_glr_plot_mds_001.png - :target: ../auto_examples/manifold/plot_mds.html - :align: center - :scale: 60 + The use of normalized Stress-1 can be enabled by setting `normalized_stress=True`, + however it is only compatible with the non-metric MDS problem and will be ignored + in the metric case. + .. figure:: ../auto_examples/manifold/images/sphx_glr_plot_mds_001.png + :target: ../auto_examples/manifold/plot_mds.html + :align: center + :scale: 60 -.. topic:: References: +.. rubric:: References - * `"Modern Multidimensional Scaling - Theory and Applications" - `_ - Borg, I.; Groenen P. Springer Series in Statistics (1997) +* `"Modern Multidimensional Scaling - Theory and Applications" + `_ + Borg, I.; Groenen P. Springer Series in Statistics (1997) - * `"Nonmetric multidimensional scaling: a numerical method" - `_ - Kruskal, J. Psychometrika, 29 (1964) +* `"Nonmetric multidimensional scaling: a numerical method" + `_ + Kruskal, J. Psychometrika, 29 (1964) - * `"Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis" - `_ - Kruskal, J. Psychometrika, 29, (1964) +* `"Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis" + `_ + Kruskal, J. Psychometrika, 29, (1964) .. _t_sne: @@ -551,114 +543,110 @@ The disadvantages to using t-SNE are roughly: :align: center :scale: 50 -Optimizing t-SNE ----------------- -The main purpose of t-SNE is visualization of high-dimensional data. Hence, -it works best when the data will be embedded on two or three dimensions. - -Optimizing the KL divergence can be a little bit tricky sometimes. There are -five parameters that control the optimization of t-SNE and therefore possibly -the quality of the resulting embedding: - -* perplexity -* early exaggeration factor -* learning rate -* maximum number of iterations -* angle (not used in the exact method) - -The perplexity is defined as :math:`k=2^{(S)}` where :math:`S` is the Shannon -entropy of the conditional probability distribution. The perplexity of a -:math:`k`-sided die is :math:`k`, so that :math:`k` is effectively the number of -nearest neighbors t-SNE considers when generating the conditional probabilities. -Larger perplexities lead to more nearest neighbors and less sensitive to small -structure. Conversely a lower perplexity considers a smaller number of -neighbors, and thus ignores more global information in favour of the -local neighborhood. As dataset sizes get larger more points will be -required to get a reasonable sample of the local neighborhood, and hence -larger perplexities may be required. Similarly noisier datasets will require -larger perplexity values to encompass enough local neighbors to see beyond -the background noise. - -The maximum number of iterations is usually high enough and does not need -any tuning. The optimization consists of two phases: the early exaggeration -phase and the final optimization. During early exaggeration the joint -probabilities in the original space will be artificially increased by -multiplication with a given factor. Larger factors result in larger gaps -between natural clusters in the data. If the factor is too high, the KL -divergence could increase during this phase. Usually it does not have to be -tuned. A critical parameter is the learning rate. If it is too low gradient -descent will get stuck in a bad local minimum. If it is too high the KL -divergence will increase during optimization. A heuristic suggested in -Belkina et al. (2019) is to set the learning rate to the sample size -divided by the early exaggeration factor. We implement this heuristic -as `learning_rate='auto'` argument. More tips can be found in -Laurens van der Maaten's FAQ (see references). The last parameter, angle, -is a tradeoff between performance and accuracy. Larger angles imply that we -can approximate larger regions by a single point, leading to better speed -but less accurate results. - -`"How to Use t-SNE Effectively" `_ -provides a good discussion of the effects of the various parameters, as well -as interactive plots to explore the effects of different parameters. - -Barnes-Hut t-SNE ----------------- - -The Barnes-Hut t-SNE that has been implemented here is usually much slower than -other manifold learning algorithms. The optimization is quite difficult -and the computation of the gradient is :math:`O[d N log(N)]`, where :math:`d` -is the number of output dimensions and :math:`N` is the number of samples. The -Barnes-Hut method improves on the exact method where t-SNE complexity is -:math:`O[d N^2]`, but has several other notable differences: - -* The Barnes-Hut implementation only works when the target dimensionality is 3 - or less. The 2D case is typical when building visualizations. -* Barnes-Hut only works with dense input data. Sparse data matrices can only be - embedded with the exact method or can be approximated by a dense low rank - projection for instance using :class:`~sklearn.decomposition.TruncatedSVD` -* Barnes-Hut is an approximation of the exact method. The approximation is - parameterized with the angle parameter, therefore the angle parameter is - unused when method="exact" -* Barnes-Hut is significantly more scalable. Barnes-Hut can be used to embed - hundred of thousands of data points while the exact method can handle - thousands of samples before becoming computationally intractable - -For visualization purpose (which is the main use case of t-SNE), using the -Barnes-Hut method is strongly recommended. The exact t-SNE method is useful -for checking the theoretically properties of the embedding possibly in higher -dimensional space but limit to small datasets due to computational constraints. - -Also note that the digits labels roughly match the natural grouping found by -t-SNE while the linear 2D projection of the PCA model yields a representation -where label regions largely overlap. This is a strong clue that this data can -be well separated by non linear methods that focus on the local structure (e.g. -an SVM with a Gaussian RBF kernel). However, failing to visualize well -separated homogeneously labeled groups with t-SNE in 2D does not necessarily -imply that the data cannot be correctly classified by a supervised model. It -might be the case that 2 dimensions are not high enough to accurately represent -the internal structure of the data. - - -.. topic:: References: - - * `"Visualizing High-Dimensional Data Using t-SNE" - `_ - van der Maaten, L.J.P.; Hinton, G. Journal of Machine Learning Research - (2008) - - * `"t-Distributed Stochastic Neighbor Embedding" - `_ - van der Maaten, L.J.P. - - * `"Accelerating t-SNE using Tree-Based Algorithms" - `_ - van der Maaten, L.J.P.; Journal of Machine Learning Research 15(Oct):3221-3245, 2014. - - * `"Automated optimized parameters for T-distributed stochastic neighbor - embedding improve visualization and analysis of large datasets" - `_ - Belkina, A.C., Ciccolella, C.O., Anno, R., Halpert, R., Spidlen, J., - Snyder-Cappione, J.E., Nature Communications 10, 5415 (2019). +.. dropdown:: Optimizing t-SNE + + The main purpose of t-SNE is visualization of high-dimensional data. Hence, + it works best when the data will be embedded on two or three dimensions. + + Optimizing the KL divergence can be a little bit tricky sometimes. There are + five parameters that control the optimization of t-SNE and therefore possibly + the quality of the resulting embedding: + + * perplexity + * early exaggeration factor + * learning rate + * maximum number of iterations + * angle (not used in the exact method) + + The perplexity is defined as :math:`k=2^{(S)}` where :math:`S` is the Shannon + entropy of the conditional probability distribution. The perplexity of a + :math:`k`-sided die is :math:`k`, so that :math:`k` is effectively the number of + nearest neighbors t-SNE considers when generating the conditional probabilities. + Larger perplexities lead to more nearest neighbors and less sensitive to small + structure. Conversely a lower perplexity considers a smaller number of + neighbors, and thus ignores more global information in favour of the + local neighborhood. As dataset sizes get larger more points will be + required to get a reasonable sample of the local neighborhood, and hence + larger perplexities may be required. Similarly noisier datasets will require + larger perplexity values to encompass enough local neighbors to see beyond + the background noise. + + The maximum number of iterations is usually high enough and does not need + any tuning. The optimization consists of two phases: the early exaggeration + phase and the final optimization. During early exaggeration the joint + probabilities in the original space will be artificially increased by + multiplication with a given factor. Larger factors result in larger gaps + between natural clusters in the data. If the factor is too high, the KL + divergence could increase during this phase. Usually it does not have to be + tuned. A critical parameter is the learning rate. If it is too low gradient + descent will get stuck in a bad local minimum. If it is too high the KL + divergence will increase during optimization. A heuristic suggested in + Belkina et al. (2019) is to set the learning rate to the sample size + divided by the early exaggeration factor. We implement this heuristic + as `learning_rate='auto'` argument. More tips can be found in + Laurens van der Maaten's FAQ (see references). The last parameter, angle, + is a tradeoff between performance and accuracy. Larger angles imply that we + can approximate larger regions by a single point, leading to better speed + but less accurate results. + + `"How to Use t-SNE Effectively" `_ + provides a good discussion of the effects of the various parameters, as well + as interactive plots to explore the effects of different parameters. + +.. dropdown:: Barnes-Hut t-SNE + + The Barnes-Hut t-SNE that has been implemented here is usually much slower than + other manifold learning algorithms. The optimization is quite difficult + and the computation of the gradient is :math:`O[d N log(N)]`, where :math:`d` + is the number of output dimensions and :math:`N` is the number of samples. The + Barnes-Hut method improves on the exact method where t-SNE complexity is + :math:`O[d N^2]`, but has several other notable differences: + + * The Barnes-Hut implementation only works when the target dimensionality is 3 + or less. The 2D case is typical when building visualizations. + * Barnes-Hut only works with dense input data. Sparse data matrices can only be + embedded with the exact method or can be approximated by a dense low rank + projection for instance using :class:`~sklearn.decomposition.PCA` + * Barnes-Hut is an approximation of the exact method. The approximation is + parameterized with the angle parameter, therefore the angle parameter is + unused when method="exact" + * Barnes-Hut is significantly more scalable. Barnes-Hut can be used to embed + hundred of thousands of data points while the exact method can handle + thousands of samples before becoming computationally intractable + + For visualization purpose (which is the main use case of t-SNE), using the + Barnes-Hut method is strongly recommended. The exact t-SNE method is useful + for checking the theoretically properties of the embedding possibly in higher + dimensional space but limit to small datasets due to computational constraints. + + Also note that the digits labels roughly match the natural grouping found by + t-SNE while the linear 2D projection of the PCA model yields a representation + where label regions largely overlap. This is a strong clue that this data can + be well separated by non linear methods that focus on the local structure (e.g. + an SVM with a Gaussian RBF kernel). However, failing to visualize well + separated homogeneously labeled groups with t-SNE in 2D does not necessarily + imply that the data cannot be correctly classified by a supervised model. It + might be the case that 2 dimensions are not high enough to accurately represent + the internal structure of the data. + +.. rubric:: References + +* `"Visualizing High-Dimensional Data Using t-SNE" + `_ + van der Maaten, L.J.P.; Hinton, G. Journal of Machine Learning Research (2008) + +* `"t-Distributed Stochastic Neighbor Embedding" + `_ van der Maaten, L.J.P. + +* `"Accelerating t-SNE using Tree-Based Algorithms" + `_ + van der Maaten, L.J.P.; Journal of Machine Learning Research 15(Oct):3221-3245, 2014. + +* `"Automated optimized parameters for T-distributed stochastic neighbor + embedding improve visualization and analysis of large datasets" + `_ + Belkina, A.C., Ciccolella, C.O., Anno, R., Halpert, R., Spidlen, J., + Snyder-Cappione, J.E., Nature Communications 10, 5415 (2019). Tips on practical use ===================== diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst index 71e914afad192..63ea797223c22 100644 --- a/doc/modules/metrics.rst +++ b/doc/modules/metrics.rst @@ -28,9 +28,9 @@ There are a number of ways to convert between a distance metric and a similarity measure, such as a kernel. Let ``D`` be the distance, and ``S`` be the kernel: - 1. ``S = np.exp(-D * gamma)``, where one heuristic for choosing - ``gamma`` is ``1 / num_features`` - 2. ``S = 1. / (D / np.max(D))`` +1. ``S = np.exp(-D * gamma)``, where one heuristic for choosing + ``gamma`` is ``1 / num_features`` +2. ``S = 1. / (D / np.max(D))`` .. currentmodule:: sklearn.metrics @@ -87,11 +87,11 @@ represented as tf-idf vectors. can produce normalized vectors, in which case :func:`cosine_similarity` is equivalent to :func:`linear_kernel`, only slower.) -.. topic:: References: +.. rubric:: References - * C.D. Manning, P. Raghavan and H. SchÃŧtze (2008). Introduction to - Information Retrieval. Cambridge University Press. - https://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html +* C.D. Manning, P. Raghavan and H. SchÃŧtze (2008). Introduction to + Information Retrieval. Cambridge University Press. + https://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html .. _linear_kernel: @@ -123,8 +123,8 @@ The polynomial kernel is defined as: where: - * ``x``, ``y`` are the input vectors - * ``d`` is the kernel degree +* ``x``, ``y`` are the input vectors +* ``d`` is the kernel degree If :math:`c_0 = 0` the kernel is said to be homogeneous. @@ -143,9 +143,9 @@ activation function). It is defined as: where: - * ``x``, ``y`` are the input vectors - * :math:`\gamma` is known as slope - * :math:`c_0` is known as intercept +* ``x``, ``y`` are the input vectors +* :math:`\gamma` is known as slope +* :math:`c_0` is known as intercept .. _rbf_kernel: @@ -165,14 +165,14 @@ the kernel is known as the Gaussian kernel of variance :math:`\sigma^2`. Laplacian kernel ---------------- -The function :func:`laplacian_kernel` is a variant on the radial basis +The function :func:`laplacian_kernel` is a variant on the radial basis function kernel defined as: .. math:: k(x, y) = \exp( -\gamma \| x-y \|_1) -where ``x`` and ``y`` are the input vectors and :math:`\|x-y\|_1` is the +where ``x`` and ``y`` are the input vectors and :math:`\|x-y\|_1` is the Manhattan distance between the input vectors. It has proven useful in ML applied to noiseless data. @@ -222,11 +222,10 @@ which is a distance between discrete probability distributions. The chi squared kernel is most commonly used on histograms (bags) of visual words. -.. topic:: References: - - * Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C. - Local features and kernels for classification of texture and object - categories: A comprehensive study - International Journal of Computer Vision 2007 - https://hal.archives-ouvertes.fr/hal-00171412/document +.. rubric:: References +* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C. + Local features and kernels for classification of texture and object + categories: A comprehensive study + International Journal of Computer Vision 2007 + https://hal.archives-ouvertes.fr/hal-00171412/document diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst index fbf0551da93a4..1fd72c3158336 100644 --- a/doc/modules/mixture.rst +++ b/doc/modules/mixture.rst @@ -14,13 +14,13 @@ matrices supported), sample them, and estimate them from data. Facilities to help determine the appropriate number of components are also provided. - .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_pdf_001.png - :target: ../auto_examples/mixture/plot_gmm_pdf.html - :align: center - :scale: 50% +.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_pdf_001.png + :target: ../auto_examples/mixture/plot_gmm_pdf.html + :align: center + :scale: 50% - **Two-component Gaussian mixture model:** *data points, and equi-probability - surfaces of the model.* + **Two-component Gaussian mixture model:** *data points, and equi-probability + surfaces of the model.* A Gaussian mixture model is a probabilistic model that assumes all the data points are generated from a mixture of a finite number of @@ -60,117 +60,111 @@ full covariance. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_covariances.py` for an example of - using the Gaussian mixture as clustering on the iris dataset. +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_covariances.py` for an example of + using the Gaussian mixture as clustering on the iris dataset. - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the - density estimation. +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the + density estimation. -Pros and cons of class :class:`GaussianMixture` ------------------------------------------------ +.. dropdown:: Pros and cons of class GaussianMixture -Pros -.... + .. rubric:: Pros -:Speed: It is the fastest algorithm for learning mixture models + :Speed: It is the fastest algorithm for learning mixture models -:Agnostic: As this algorithm maximizes only the likelihood, it - will not bias the means towards zero, or bias the cluster sizes to - have specific structures that might or might not apply. + :Agnostic: As this algorithm maximizes only the likelihood, it + will not bias the means towards zero, or bias the cluster sizes to + have specific structures that might or might not apply. -Cons -.... + .. rubric:: Cons -:Singularities: When one has insufficiently many points per - mixture, estimating the covariance matrices becomes difficult, - and the algorithm is known to diverge and find solutions with - infinite likelihood unless one regularizes the covariances artificially. + :Singularities: When one has insufficiently many points per + mixture, estimating the covariance matrices becomes difficult, + and the algorithm is known to diverge and find solutions with + infinite likelihood unless one regularizes the covariances artificially. -:Number of components: This algorithm will always use all the - components it has access to, needing held-out data - or information theoretical criteria to decide how many components to use - in the absence of external cues. + :Number of components: This algorithm will always use all the + components it has access to, needing held-out data + or information theoretical criteria to decide how many components to use + in the absence of external cues. -Selecting the number of components in a classical Gaussian Mixture Model ------------------------------------------------------------------------- +.. dropdown:: Selecting the number of components in a classical Gaussian Mixture model -The BIC criterion can be used to select the number of components in a Gaussian -Mixture in an efficient way. In theory, it recovers the true number of -components only in the asymptotic regime (i.e. if much data is available and -assuming that the data was actually generated i.i.d. from a mixture of Gaussian -distribution). Note that using a :ref:`Variational Bayesian Gaussian mixture ` -avoids the specification of the number of components for a Gaussian mixture -model. + The BIC criterion can be used to select the number of components in a Gaussian + Mixture in an efficient way. In theory, it recovers the true number of + components only in the asymptotic regime (i.e. if much data is available and + assuming that the data was actually generated i.i.d. from a mixture of Gaussian + distribution). Note that using a :ref:`Variational Bayesian Gaussian mixture ` + avoids the specification of the number of components for a Gaussian mixture + model. -.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_selection_002.png - :target: ../auto_examples/mixture/plot_gmm_selection.html - :align: center - :scale: 50% + .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_selection_002.png + :target: ../auto_examples/mixture/plot_gmm_selection.html + :align: center + :scale: 50% -.. topic:: Examples: + .. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example - of model selection performed with classical Gaussian mixture. + * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example + of model selection performed with classical Gaussian mixture. .. _expectation_maximization: -Estimation algorithm Expectation-maximization ------------------------------------------------ - -The main difficulty in learning Gaussian mixture models from unlabeled -data is that one usually doesn't know which points came from -which latent component (if one has access to this information it gets -very easy to fit a separate Gaussian distribution to each set of -points). `Expectation-maximization -`_ -is a well-founded statistical -algorithm to get around this problem by an iterative process. First -one assumes random components (randomly centered on data points, -learned from k-means, or even just normally distributed around the -origin) and computes for each point a probability of being generated by -each component of the model. Then, one tweaks the -parameters to maximize the likelihood of the data given those -assignments. Repeating this process is guaranteed to always converge -to a local optimum. - -Choice of the Initialization Method ------------------------------------ - -There is a choice of four initialization methods (as well as inputting user defined -initial means) to generate the initial centers for the model components: - -k-means (default) - This applies a traditional k-means clustering algorithm. - This can be computationally expensive compared to other initialization methods. - -k-means++ - This uses the initialization method of k-means clustering: k-means++. - This will pick the first center at random from the data. Subsequent centers will be - chosen from a weighted distribution of the data favouring points further away from - existing centers. k-means++ is the default initialization for k-means so will be - quicker than running a full k-means but can still take a significant amount of - time for large data sets with many components. - -random_from_data - This will pick random data points from the input data as the initial - centers. This is a very fast method of initialization but can produce non-convergent - results if the chosen points are too close to each other. - -random - Centers are chosen as a small perturbation away from the mean of all data. - This method is simple but can lead to the model taking longer to converge. - -.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png - :target: ../auto_examples/mixture/plot_gmm_init.html - :align: center - :scale: 50% - -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of - using different initializations in Gaussian Mixture. +.. dropdown:: Estimation algorithm expectation-maximization + + The main difficulty in learning Gaussian mixture models from unlabeled + data is that one usually doesn't know which points came from + which latent component (if one has access to this information it gets + very easy to fit a separate Gaussian distribution to each set of + points). `Expectation-maximization + `_ + is a well-founded statistical + algorithm to get around this problem by an iterative process. First + one assumes random components (randomly centered on data points, + learned from k-means, or even just normally distributed around the + origin) and computes for each point a probability of being generated by + each component of the model. Then, one tweaks the + parameters to maximize the likelihood of the data given those + assignments. Repeating this process is guaranteed to always converge + to a local optimum. + +.. dropdown:: Choice of the Initialization method + + There is a choice of four initialization methods (as well as inputting user defined + initial means) to generate the initial centers for the model components: + + k-means (default) + This applies a traditional k-means clustering algorithm. + This can be computationally expensive compared to other initialization methods. + + k-means++ + This uses the initialization method of k-means clustering: k-means++. + This will pick the first center at random from the data. Subsequent centers will be + chosen from a weighted distribution of the data favouring points further away from + existing centers. k-means++ is the default initialization for k-means so will be + quicker than running a full k-means but can still take a significant amount of + time for large data sets with many components. + + random_from_data + This will pick random data points from the input data as the initial + centers. This is a very fast method of initialization but can produce non-convergent + results if the chosen points are too close to each other. + + random + Centers are chosen as a small perturbation away from the mean of all data. + This method is simple but can lead to the model taking longer to converge. + + .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png + :target: ../auto_examples/mixture/plot_gmm_init.html + :align: center + :scale: 50% + + .. rubric:: Examples + + * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of + using different initializations in Gaussian Mixture. .. _bgmm: @@ -183,8 +177,7 @@ similar to the one defined by :class:`GaussianMixture`. .. _variational_inference: -Estimation algorithm: variational inference ---------------------------------------------- +**Estimation algorithm: variational inference** Variational inference is an extension of expectation-maximization that maximizes a lower bound on model evidence (including @@ -266,64 +259,58 @@ from the two resulting mixtures. -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm.py` for an example on - plotting the confidence ellipsoids for both :class:`GaussianMixture` - and :class:`BayesianGaussianMixture`. - - * :ref:`sphx_glr_auto_examples_mixture_plot_gmm_sin.py` shows using - :class:`GaussianMixture` and :class:`BayesianGaussianMixture` to fit a - sine wave. - - * See :ref:`sphx_glr_auto_examples_mixture_plot_concentration_prior.py` - for an example plotting the confidence ellipsoids for the - :class:`BayesianGaussianMixture` with different - ``weight_concentration_prior_type`` for different values of the parameter - ``weight_concentration_prior``. +.. rubric:: Examples +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm.py` for an example on + plotting the confidence ellipsoids for both :class:`GaussianMixture` + and :class:`BayesianGaussianMixture`. -Pros and cons of variational inference with :class:`BayesianGaussianMixture` ----------------------------------------------------------------------------- +* :ref:`sphx_glr_auto_examples_mixture_plot_gmm_sin.py` shows using + :class:`GaussianMixture` and :class:`BayesianGaussianMixture` to fit a + sine wave. -Pros -..... +* See :ref:`sphx_glr_auto_examples_mixture_plot_concentration_prior.py` + for an example plotting the confidence ellipsoids for the + :class:`BayesianGaussianMixture` with different + ``weight_concentration_prior_type`` for different values of the parameter + ``weight_concentration_prior``. -:Automatic selection: when ``weight_concentration_prior`` is small enough and - ``n_components`` is larger than what is found necessary by the model, the - Variational Bayesian mixture model has a natural tendency to set some mixture - weights values close to zero. This makes it possible to let the model choose - a suitable number of effective components automatically. Only an upper bound - of this number needs to be provided. Note however that the "ideal" number of - active components is very application specific and is typically ill-defined - in a data exploration setting. +.. dropdown:: Pros and cons of variational inference with BayesianGaussianMixture -:Less sensitivity to the number of parameters: unlike finite models, which will - almost always use all components as much as they can, and hence will produce - wildly different solutions for different numbers of components, the - variational inference with a Dirichlet process prior - (``weight_concentration_prior_type='dirichlet_process'``) won't change much - with changes to the parameters, leading to more stability and less tuning. + .. rubric:: Pros -:Regularization: due to the incorporation of prior information, - variational solutions have less pathological special cases than - expectation-maximization solutions. + :Automatic selection: when ``weight_concentration_prior`` is small enough and + ``n_components`` is larger than what is found necessary by the model, the + Variational Bayesian mixture model has a natural tendency to set some mixture + weights values close to zero. This makes it possible to let the model choose + a suitable number of effective components automatically. Only an upper bound + of this number needs to be provided. Note however that the "ideal" number of + active components is very application specific and is typically ill-defined + in a data exploration setting. + :Less sensitivity to the number of parameters: unlike finite models, which will + almost always use all components as much as they can, and hence will produce + wildly different solutions for different numbers of components, the + variational inference with a Dirichlet process prior + (``weight_concentration_prior_type='dirichlet_process'``) won't change much + with changes to the parameters, leading to more stability and less tuning. -Cons -..... + :Regularization: due to the incorporation of prior information, + variational solutions have less pathological special cases than + expectation-maximization solutions. -:Speed: the extra parametrization necessary for variational inference makes - inference slower, although not by much. + .. rubric:: Cons -:Hyperparameters: this algorithm needs an extra hyperparameter - that might need experimental tuning via cross-validation. + :Speed: the extra parametrization necessary for variational inference makes + inference slower, although not by much. -:Bias: there are many implicit biases in the inference algorithms (and also in - the Dirichlet process if used), and whenever there is a mismatch between - these biases and the data it might be possible to fit better models using a - finite mixture. + :Hyperparameters: this algorithm needs an extra hyperparameter + that might need experimental tuning via cross-validation. + :Bias: there are many implicit biases in the inference algorithms (and also in + the Dirichlet process if used), and whenever there is a mismatch between + these biases and the data it might be possible to fit better models using a + finite mixture. .. _dirichlet_process: diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 670e661d92ef7..eff6684458deb 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -77,6 +77,7 @@ Scoring Function 'roc_auc_ovo' :func:`metrics.roc_auc_score` 'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` 'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` +'d2_log_loss_score' :func:`metrics.d2_log_loss_score` **Clustering** 'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` @@ -94,19 +95,17 @@ Scoring Function 'max_error' :func:`metrics.max_error` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` -'neg_root_mean_squared_error' :func:`metrics.mean_squared_error` +'neg_root_mean_squared_error' :func:`metrics.root_mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` +'neg_root_mean_squared_log_error' :func:`metrics.root_mean_squared_log_error` 'neg_median_absolute_error' :func:`metrics.median_absolute_error` 'r2' :func:`metrics.r2_score` 'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` 'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` 'neg_mean_absolute_percentage_error' :func:`metrics.mean_absolute_percentage_error` -'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` -'d2_pinball_score' :func:`metrics.d2_pinball_score` -'d2_tweedie_score' :func:`metrics.d2_tweedie_score` +'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` ==================================== ============================================== ================================== - Usage examples: >>> from sklearn import svm, datasets @@ -129,125 +128,148 @@ Usage examples: Defining your scoring strategy from metric functions ----------------------------------------------------- +The following metrics functions are not implemented as named scorers, +sometimes because they require additional parameters, such as +:func:`fbeta_score`. They cannot be passed to the ``scoring`` +parameters; instead their callable needs to be passed to +:func:`make_scorer` together with the value of the user-settable +parameters. + +===================================== ========= ============================================== +Function Parameter Example usage +===================================== ========= ============================================== +**Classification** +:func:`metrics.fbeta_score` ``beta`` ``make_scorer(fbeta_score, beta=2)`` + +**Regression** +:func:`metrics.mean_tweedie_deviance` ``power`` ``make_scorer(mean_tweedie_deviance, power=1.5)`` +:func:`metrics.mean_pinball_loss` ``alpha`` ``make_scorer(mean_pinball_loss, alpha=0.95)`` +:func:`metrics.d2_tweedie_score` ``power`` ``make_scorer(d2_tweedie_score, power=1.5)`` +:func:`metrics.d2_pinball_score` ``alpha`` ``make_scorer(d2_pinball_score, alpha=0.95)`` +===================================== ========= ============================================== + +One typical use case is to wrap an existing metric function from the library +with non-default values for its parameters, such as the ``beta`` parameter for +the :func:`fbeta_score` function:: + + >>> from sklearn.metrics import fbeta_score, make_scorer + >>> ftwo_scorer = make_scorer(fbeta_score, beta=2) + >>> from sklearn.model_selection import GridSearchCV + >>> from sklearn.svm import LinearSVC + >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, + ... scoring=ftwo_scorer, cv=5) + The module :mod:`sklearn.metrics` also exposes a set of simple functions measuring a prediction error given ground truth and prediction: - functions ending with ``_score`` return a value to maximize, the higher the better. -- functions ending with ``_error`` or ``_loss`` return a +- functions ending with ``_error``, ``_loss``, or ``_deviance`` return a value to minimize, the lower the better. When converting into a scorer object using :func:`make_scorer`, set the ``greater_is_better`` parameter to ``False`` (``True`` by default; see the parameter description below). -Metrics available for various machine learning tasks are detailed in sections -below. -Many metrics are not given names to be used as ``scoring`` values, -sometimes because they require additional parameters, such as -:func:`fbeta_score`. In such cases, you need to generate an appropriate -scoring object. The simplest way to generate a callable object for scoring -is by using :func:`make_scorer`. That function converts metrics -into callables that can be used for model evaluation. +.. dropdown:: Custom scorer objects + + The second use case is to build a completely custom scorer object + from a simple python function using :func:`make_scorer`, which can + take several parameters: + + * the python function you want to use (``my_custom_loss_func`` + in the example below) + + * whether the python function returns a score (``greater_is_better=True``, + the default) or a loss (``greater_is_better=False``). If a loss, the output + of the python function is negated by the scorer object, conforming to + the cross validation convention that scorers return higher values for better models. + + * for classification metrics only: whether the python function you provided requires + continuous decision certainties. If the scoring function only accepts probability + estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter + `response_method`, thus in this case `response_method="predict_proba"`. Some scoring + function do not necessarily require probability estimates but rather non-thresholded + decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a + list such as `response_method=["decision_function", "predict_proba"]`. In this case, + the scorer will use the first available method, in the order given in the list, + to compute the scores. + + * any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`. + + Here is an example of building custom scorers, and of using the + ``greater_is_better`` parameter:: + + >>> import numpy as np + >>> def my_custom_loss_func(y_true, y_pred): + ... diff = np.abs(y_true - y_pred).max() + ... return np.log1p(diff) + ... + >>> # score will negate the return value of my_custom_loss_func, + >>> # which will be np.log(2), 0.693, given the values for X + >>> # and y defined below. + >>> score = make_scorer(my_custom_loss_func, greater_is_better=False) + >>> X = [[1], [1]] + >>> y = [0, 1] + >>> from sklearn.dummy import DummyClassifier + >>> clf = DummyClassifier(strategy='most_frequent', random_state=0) + >>> clf = clf.fit(X, y) + >>> my_custom_loss_func(y, clf.predict(X)) + 0.69... + >>> score(clf, X, y) + -0.69... -One typical use case is to wrap an existing metric function from the library -with non-default values for its parameters, such as the ``beta`` parameter for -the :func:`fbeta_score` function:: +.. _diy_scoring: - >>> from sklearn.metrics import fbeta_score, make_scorer - >>> ftwo_scorer = make_scorer(fbeta_score, beta=2) - >>> from sklearn.model_selection import GridSearchCV - >>> from sklearn.svm import LinearSVC - >>> grid = GridSearchCV(LinearSVC(dual="auto"), param_grid={'C': [1, 10]}, - ... scoring=ftwo_scorer, cv=5) +Implementing your own scoring object +------------------------------------ + +You can generate even more flexible model scorers by constructing your own +scoring object from scratch, without using the :func:`make_scorer` factory. -The second use case is to build a completely custom scorer object -from a simple python function using :func:`make_scorer`, which can -take several parameters: -* the python function you want to use (``my_custom_loss_func`` - in the example below) +.. dropdown:: How to build a scorer from scratch -* whether the python function returns a score (``greater_is_better=True``, - the default) or a loss (``greater_is_better=False``). If a loss, the output - of the python function is negated by the scorer object, conforming to - the cross validation convention that scorers return higher values for better models. + For a callable to be a scorer, it needs to meet the protocol specified by + the following two rules: -* for classification metrics only: whether the python function you provided requires continuous decision - certainties (``needs_threshold=True``). The default value is - False. + - It can be called with parameters ``(estimator, X, y)``, where ``estimator`` + is the model that should be evaluated, ``X`` is validation data, and ``y`` is + the ground truth target for ``X`` (in the supervised case) or ``None`` (in the + unsupervised case). -* any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`. + - It returns a floating point number that quantifies the + ``estimator`` prediction quality on ``X``, with reference to ``y``. + Again, by convention higher numbers are better, so if your scorer + returns loss, that value should be negated. -Here is an example of building custom scorers, and of using the -``greater_is_better`` parameter:: + - Advanced: If it requires extra metadata to be passed to it, it should expose + a ``get_metadata_routing`` method returning the requested metadata. The user + should be able to set the requested metadata via a ``set_score_request`` + method. Please see :ref:`User Guide ` and :ref:`Developer + Guide ` for + more details. - >>> import numpy as np - >>> def my_custom_loss_func(y_true, y_pred): - ... diff = np.abs(y_true - y_pred).max() - ... return np.log1p(diff) - ... - >>> # score will negate the return value of my_custom_loss_func, - >>> # which will be np.log(2), 0.693, given the values for X - >>> # and y defined below. - >>> score = make_scorer(my_custom_loss_func, greater_is_better=False) - >>> X = [[1], [1]] - >>> y = [0, 1] - >>> from sklearn.dummy import DummyClassifier - >>> clf = DummyClassifier(strategy='most_frequent', random_state=0) - >>> clf = clf.fit(X, y) - >>> my_custom_loss_func(y, clf.predict(X)) - 0.69... - >>> score(clf, X, y) - -0.69... + .. note:: **Using custom scorers in functions where n_jobs > 1** -.. _diy_scoring: + While defining the custom scoring function alongside the calling function + should work out of the box with the default joblib backend (loky), + importing it from another module will be a more robust approach and work + independently of the joblib backend. -Implementing your own scoring object ------------------------------------- -You can generate even more flexible model scorers by constructing your own -scoring object from scratch, without using the :func:`make_scorer` factory. -For a callable to be a scorer, it needs to meet the protocol specified by -the following two rules: - -- It can be called with parameters ``(estimator, X, y)``, where ``estimator`` - is the model that should be evaluated, ``X`` is validation data, and ``y`` is - the ground truth target for ``X`` (in the supervised case) or ``None`` (in the - unsupervised case). - -- It returns a floating point number that quantifies the - ``estimator`` prediction quality on ``X``, with reference to ``y``. - Again, by convention higher numbers are better, so if your scorer - returns loss, that value should be negated. - -- Advanced: If it requires extra metadata to be passed to it, it should expose - a ``get_metadata_routing`` method returning the requested metadata. The user - should be able to set the requested metadata via a ``set_score_request`` - method. Please see :ref:`User Guide ` and :ref:`Developer - Guide ` for - more details. - - -.. note:: **Using custom scorers in functions where n_jobs > 1** - - While defining the custom scoring function alongside the calling function - should work out of the box with the default joblib backend (loky), - importing it from another module will be a more robust approach and work - independently of the joblib backend. - - For example, to use ``n_jobs`` greater than 1 in the example below, - ``custom_scoring_function`` function is saved in a user-created module - (``custom_scorer_module.py``) and imported:: - - >>> from custom_scorer_module import custom_scoring_function # doctest: +SKIP - >>> cross_val_score(model, - ... X_train, - ... y_train, - ... scoring=make_scorer(custom_scoring_function, greater_is_better=False), - ... cv=5, - ... n_jobs=-1) # doctest: +SKIP + For example, to use ``n_jobs`` greater than 1 in the example below, + ``custom_scoring_function`` function is saved in a user-created module + (``custom_scorer_module.py``) and imported:: + + >>> from custom_scorer_module import custom_scoring_function # doctest: +SKIP + >>> cross_val_score(model, + ... X_train, + ... y_train, + ... scoring=make_scorer(custom_scoring_function, greater_is_better=False), + ... cv=5, + ... n_jobs=-1) # doctest: +SKIP .. _multimetric_scoring: @@ -278,7 +300,7 @@ parameter: >>> from sklearn.metrics import confusion_matrix >>> # A sample toy binary classification dataset >>> X, y = datasets.make_classification(n_classes=2, random_state=0) - >>> svm = LinearSVC(dual="auto", random_state=0) + >>> svm = LinearSVC(random_state=0) >>> def confusion_matrix_scorer(clf, X, y): ... y_pred = clf.predict(X) ... cm = confusion_matrix(y, y_pred) @@ -347,6 +369,7 @@ Some also work in the multilabel case: recall_score roc_auc_score zero_one_loss + d2_log_loss_score And some work with binary and multilabel (but not multiclass) problems: @@ -435,18 +458,18 @@ where :math:`1(x)` is the `indicator function >>> accuracy_score(y_true, y_pred) 0.5 >>> accuracy_score(y_true, y_pred, normalize=False) - 2 + 2.0 In the multilabel case with binary label indicators:: >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` - for an example of accuracy score usage using permutations of - the dataset. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` + for an example of accuracy score usage using permutations of + the dataset. .. _top_k_accuracy_score: @@ -557,22 +580,20 @@ or *informedness*. * Balanced Accuracy as described in [Urbanowicz2015]_: the average of sensitivity and specificity is computed for each class and then averaged over total number of classes. -.. topic:: References: - - .. [Guyon2015] I. Guyon, K. Bennett, G. Cawley, H.J. Escalante, S. Escalera, T.K. Ho, N. Macià, - B. Ray, M. Saeed, A.R. Statnikov, E. Viegas, `Design of the 2015 ChaLearn AutoML Challenge - `_, - IJCNN 2015. - .. [Mosley2013] L. Mosley, `A balanced approach to the multi-class imbalance problem - `_, - IJCV 2010. - .. [Kelleher2015] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, `Fundamentals of - Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, - and Case Studies `_, - 2015. - .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description - and evaluation of a scalable learning classifier - system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. +.. rubric:: References + +.. [Guyon2015] I. Guyon, K. Bennett, G. Cawley, H.J. Escalante, S. Escalera, T.K. Ho, N. Macià, + B. Ray, M. Saeed, A.R. Statnikov, E. Viegas, `Design of the 2015 ChaLearn AutoML Challenge + `_, IJCNN 2015. +.. [Mosley2013] L. Mosley, `A balanced approach to the multi-class imbalance problem + `_, IJCV 2010. +.. [Kelleher2015] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, `Fundamentals of + Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, + and Case Studies `_, + 2015. +.. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description + and evaluation of a scalable learning classifier + system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. .. _cohen_kappa: @@ -651,19 +672,19 @@ false negatives and true positives as follows:: >>> tn, fp, fn, tp (2, 1, 2, 3) -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py` - for an example of using a confusion matrix to evaluate classifier output - quality. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py` + for an example of using a confusion matrix to evaluate classifier output + quality. - * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` - for an example of using a confusion matrix to classify - hand-written digits. +* See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` + for an example of using a confusion matrix to classify + hand-written digits. - * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - for an example of using a confusion matrix to classify text - documents. +* See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` + for an example of using a confusion matrix to classify text + documents. .. _classification_report: @@ -690,15 +711,15 @@ and inferred labels:: weighted avg 0.67 0.60 0.59 5 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` - for an example of classification report usage for - hand-written digits. +* See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` + for an example of classification report usage for + hand-written digits. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` - for an example of classification report usage for - grid search with nested cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` + for an example of classification report usage for + grid search with nested cross-validation. .. _hamming_loss: @@ -807,8 +828,8 @@ binary case. The :func:`average_precision_score` function supports multiclass and multilabel formats by computing each class score in a One-vs-the-rest (OvR) fashion and averaging them or not depending of its ``average`` argument value. -The :func:`PredictionRecallDisplay.from_estimator` and -:func:`PredictionRecallDisplay.from_predictions` functions will plot the +The :func:`PrecisionRecallDisplay.from_estimator` and +:func:`PrecisionRecallDisplay.from_predictions` functions will plot the precision-recall curve as follows. .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_precision_recall_001.png @@ -816,33 +837,31 @@ precision-recall curve as follows. :scale: 75 :align: center -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` - for an example of :func:`precision_score` and :func:`recall_score` usage - to estimate parameters using grid search with nested cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` + for an example of :func:`precision_score` and :func:`recall_score` usage + to estimate parameters using grid search with nested cross-validation. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_precision_recall.py` - for an example of :func:`precision_recall_curve` usage to evaluate - classifier output quality. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_precision_recall.py` + for an example of :func:`precision_recall_curve` usage to evaluate + classifier output quality. +.. rubric:: References -.. topic:: References: - - .. [Manning2008] C.D. Manning, P. Raghavan, H. SchÃŧtze, `Introduction to Information Retrieval - `_, - 2008. - .. [Everingham2010] M. Everingham, L. Van Gool, C.K.I. Williams, J. Winn, A. Zisserman, - `The Pascal Visual Object Classes (VOC) Challenge - `_, - IJCV 2010. - .. [Davis2006] J. Davis, M. Goadrich, `The Relationship Between Precision-Recall and ROC Curves - `_, - ICML 2006. - .. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right - `_, - NIPS 2015. - +.. [Manning2008] C.D. Manning, P. Raghavan, H. SchÃŧtze, `Introduction to Information Retrieval + `_, + 2008. +.. [Everingham2010] M. Everingham, L. Van Gool, C.K.I. Williams, J. Winn, A. Zisserman, + `The Pascal Visual Object Classes (VOC) Challenge + `_, + IJCV 2010. +.. [Davis2006] J. Davis, M. Goadrich, `The Relationship Between Precision-Recall and ROC Curves + `_, + ICML 2006. +.. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right + `_, + NIPS 2015. Binary classification ^^^^^^^^^^^^^^^^^^^^^ @@ -863,22 +882,36 @@ following table: | | Missing result | Correct absence of result| +-------------------+---------------------+--------------------------+ -In this context, we can define the notions of precision, recall and F-measure: +In this context, we can define the notions of precision and recall: .. math:: - \text{precision} = \frac{tp}{tp + fp}, + \text{precision} = \frac{\text{tp}}{\text{tp} + \text{fp}}, .. math:: - \text{recall} = \frac{tp}{tp + fn}, + \text{recall} = \frac{\text{tp}}{\text{tp} + \text{fn}}, + +(Sometimes recall is also called ''sensitivity'') + +F-measure is the weighted harmonic mean of precision and recall, with precision's +contribution to the mean weighted by some parameter :math:`\beta`: .. math:: - F_\beta = (1 + \beta^2) \frac{\text{precision} \times \text{recall}}{\beta^2 \text{precision} + \text{recall}}. + F_\beta = (1 + \beta^2) \frac{\text{precision} \times \text{recall}}{\beta^2 \text{precision} + \text{recall}} -Sometimes recall is also called ''sensitivity''. +To avoid division by zero when precision and recall are zero, Scikit-Learn calculates F-measure with this +otherwise-equivalent formula: +.. math:: + + F_\beta = \frac{(1 + \beta^2) \text{tp}}{(1 + \beta^2) \text{tp} + \text{fp} + \beta^2 \text{fn}} + +Note that this formula is still undefined when there are no true positives, false +positives, or false negatives. By default, F-1 for a set of exclusively true negatives +is calculated as 0, however this behavior can be changed using the `zero_division` +parameter. Here are some small examples in binary classification:: >>> from sklearn import metrics @@ -926,10 +959,17 @@ specified by the ``average`` argument to the :func:`average_precision_score`, :func:`f1_score`, :func:`fbeta_score`, :func:`precision_recall_fscore_support`, :func:`precision_score` and :func:`recall_score` functions, as described -:ref:`above `. Note that if all labels are included, "micro"-averaging -in a multiclass setting will produce precision, recall and :math:`F` -that are all identical to accuracy. Also note that "weighted" averaging may -produce an F-score that is not between precision and recall. +:ref:`above `. + +Note the following behaviors when averaging: + +* If all labels are included, "micro"-averaging in a multiclass setting will produce + precision, recall and :math:`F` that are all identical to accuracy. +* "weighted" averaging may produce a F-score that is not between precision and recall. +* "macro" averaging for F-measures is calculated as the arithmetic mean over + per-label/class F-measures, not the harmonic mean over the arithmetic precision and + recall means. Both calculations can be seen in the literature but are not equivalent, + see [OB2019]_ for details. To make this more explicit, consider the following notation: @@ -990,6 +1030,11 @@ Similarly, labels not present in the data sample may be accounted for in macro-a >>> metrics.precision_score(y_true, y_pred, labels=[0, 1, 2, 3], average='macro') 0.166... +.. rubric:: References + +.. [OB2019] :arxiv:`Opitz, J., & Burst, S. (2019). "Macro f1 and macro f1." + <1911.03347>` + .. _jaccard_similarity_score: Jaccard similarity coefficient score @@ -1094,9 +1139,9 @@ with a svm classifier in a binary class problem:: >>> from sklearn.metrics import hinge_loss >>> X = [[0], [1]] >>> y = [-1, 1] - >>> est = svm.LinearSVC(dual="auto", random_state=0) + >>> est = svm.LinearSVC(random_state=0) >>> est.fit(X, y) - LinearSVC(dual='auto', random_state=0) + LinearSVC(random_state=0) >>> pred_decision = est.decision_function([[-2], [3], [0.5]]) >>> pred_decision array([-2.18..., 2.36..., 0.09...]) @@ -1109,9 +1154,9 @@ with a svm classifier in a multiclass problem:: >>> X = np.array([[0], [1], [2], [3]]) >>> Y = np.array([0, 1, 2, 3]) >>> labels = np.array([0, 1, 2, 3]) - >>> est = svm.LinearSVC(dual="auto") + >>> est = svm.LinearSVC() >>> est.fit(X, Y) - LinearSVC(dual='auto') + LinearSVC() >>> pred_decision = est.decision_function([[-1], [2], [3]]) >>> y_true = [0, 2, 3] >>> hinge_loss(y_true, pred_decision, labels=labels) @@ -1222,6 +1267,7 @@ When there are more than two labels, the value of the MCC will no longer range between -1 and +1. Instead the minimum value will be somewhere between -1 and 0 depending on the number and distribution of ground true labels. The maximum value is always +1. +For additional information, see [WikipediaMCC2021]_. Here is a small example illustrating the usage of the :func:`matthews_corrcoef` function: @@ -1232,6 +1278,13 @@ function: >>> matthews_corrcoef(y_true, y_pred) -0.33... +.. topic:: References: + + .. [WikipediaMCC2021] Wikipedia contributors. Phi coefficient. + Wikipedia, The Free Encyclopedia. April 21, 2021, 12:21 CEST. + Available at: https://en.wikipedia.org/wiki/Phi_coefficient + Accessed April 21, 2021. + .. _multilabel_confusion_matrix: Multi-label confusion matrix @@ -1440,53 +1493,57 @@ correspond to the probability estimates that a sample belongs to a particular class. The OvO and OvR algorithms support weighting uniformly (``average='macro'``) and by prevalence (``average='weighted'``). -**One-vs-one Algorithm**: Computes the average AUC of all possible pairwise -combinations of classes. [HT2001]_ defines a multiclass AUC metric weighted -uniformly: +.. dropdown:: One-vs-one Algorithm -.. math:: + Computes the average AUC of all possible pairwise + combinations of classes. [HT2001]_ defines a multiclass AUC metric weighted + uniformly: - \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) + - \text{AUC}(k | j)) + .. math:: -where :math:`c` is the number of classes and :math:`\text{AUC}(j | k)` is the -AUC with class :math:`j` as the positive class and class :math:`k` as the -negative class. In general, -:math:`\text{AUC}(j | k) \neq \text{AUC}(k | j))` in the multiclass -case. This algorithm is used by setting the keyword argument ``multiclass`` -to ``'ovo'`` and ``average`` to ``'macro'``. + \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) + + \text{AUC}(k | j)) -The [HT2001]_ multiclass AUC metric can be extended to be weighted by the -prevalence: + where :math:`c` is the number of classes and :math:`\text{AUC}(j | k)` is the + AUC with class :math:`j` as the positive class and class :math:`k` as the + negative class. In general, + :math:`\text{AUC}(j | k) \neq \text{AUC}(k | j))` in the multiclass + case. This algorithm is used by setting the keyword argument ``multiclass`` + to ``'ovo'`` and ``average`` to ``'macro'``. -.. math:: + The [HT2001]_ multiclass AUC metric can be extended to be weighted by the + prevalence: - \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)( - \text{AUC}(j | k) + \text{AUC}(k | j)) + .. math:: -where :math:`c` is the number of classes. This algorithm is used by setting -the keyword argument ``multiclass`` to ``'ovo'`` and ``average`` to -``'weighted'``. The ``'weighted'`` option returns a prevalence-weighted average -as described in [FC2009]_. + \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)( + \text{AUC}(j | k) + \text{AUC}(k | j)) -**One-vs-rest Algorithm**: Computes the AUC of each class against the rest -[PD2000]_. The algorithm is functionally the same as the multilabel case. To -enable this algorithm set the keyword argument ``multiclass`` to ``'ovr'``. -Additionally to ``'macro'`` [F2006]_ and ``'weighted'`` [F2001]_ averaging, OvR -supports ``'micro'`` averaging. + where :math:`c` is the number of classes. This algorithm is used by setting + the keyword argument ``multiclass`` to ``'ovo'`` and ``average`` to + ``'weighted'``. The ``'weighted'`` option returns a prevalence-weighted average + as described in [FC2009]_. -In applications where a high false positive rate is not tolerable the parameter -``max_fpr`` of :func:`roc_auc_score` can be used to summarize the ROC curve up -to the given limit. +.. dropdown:: One-vs-rest Algorithm -The following figure shows the micro-averaged ROC curve and its corresponding -ROC-AUC score for a classifier aimed to distinguish the the different species in -the :ref:`iris_dataset`: + Computes the AUC of each class against the rest + [PD2000]_. The algorithm is functionally the same as the multilabel case. To + enable this algorithm set the keyword argument ``multiclass`` to ``'ovr'``. + Additionally to ``'macro'`` [F2006]_ and ``'weighted'`` [F2001]_ averaging, OvR + supports ``'micro'`` averaging. -.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_roc_002.png - :target: ../auto_examples/model_selection/plot_roc.html - :scale: 75 - :align: center + In applications where a high false positive rate is not tolerable the parameter + ``max_fpr`` of :func:`roc_auc_score` can be used to summarize the ROC curve up + to the given limit. + + The following figure shows the micro-averaged ROC curve and its corresponding + ROC-AUC score for a classifier aimed to distinguish the different species in + the :ref:`iris_dataset`: + + .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_roc_002.png + :target: ../auto_examples/model_selection/plot_roc.html + :scale: 75 + :align: center .. _roc_auc_multilabel: @@ -1516,46 +1573,43 @@ And the decision values do not require such processing. >>> roc_auc_score(y, y_score, average=None) array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...]) -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py` - for an example of using ROC to - evaluate the quality of the output of a classifier. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py` for an example of + using ROC to evaluate the quality of the output of a classifier. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py` - for an example of using ROC to - evaluate classifier output quality, using cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py` for an + example of using ROC to evaluate classifier output quality, using cross-validation. - * See :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` - for an example of using ROC to - model species distribution. +* See :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` + for an example of using ROC to model species distribution. -.. topic:: References: +.. rubric:: References - .. [HT2001] Hand, D.J. and Till, R.J., (2001). `A simple generalisation - of the area under the ROC curve for multiple class classification problems. - `_ - Machine learning, 45(2), pp. 171-186. +.. [HT2001] Hand, D.J. and Till, R.J., (2001). `A simple generalisation + of the area under the ROC curve for multiple class classification problems. + `_ + Machine learning, 45(2), pp. 171-186. - .. [FC2009] Ferri, Cèsar & Hernandez-Orallo, Jose & Modroiu, R. (2009). - `An Experimental Comparison of Performance Measures for Classification. - `_ - Pattern Recognition Letters. 30. 27-38. +.. [FC2009] Ferri, Cèsar & Hernandez-Orallo, Jose & Modroiu, R. (2009). + `An Experimental Comparison of Performance Measures for Classification. + `_ + Pattern Recognition Letters. 30. 27-38. - .. [PD2000] Provost, F., Domingos, P. (2000). `Well-trained PETs: Improving - probability estimation trees - `_ - (Section 6.2), CeDER Working Paper #IS-00-04, Stern School of Business, - New York University. +.. [PD2000] Provost, F., Domingos, P. (2000). `Well-trained PETs: Improving + probability estimation trees + `_ + (Section 6.2), CeDER Working Paper #IS-00-04, Stern School of Business, + New York University. - .. [F2006] Fawcett, T., 2006. `An introduction to ROC analysis. - `_ - Pattern Recognition Letters, 27(8), pp. 861-874. +.. [F2006] Fawcett, T., 2006. `An introduction to ROC analysis. + `_ + Pattern Recognition Letters, 27(8), pp. 861-874. - .. [F2001] Fawcett, T., 2001. `Using rule sets to maximize - ROC performance `_ - In Data Mining, 2001. - Proceedings IEEE International Conference, pp. 131-138. +.. [F2001] Fawcett, T., 2001. `Using rule sets to maximize + ROC performance `_ + In Data Mining, 2001. + Proceedings IEEE International Conference, pp. 131-138. .. _det_curve: @@ -1591,59 +1645,57 @@ same classification task: :scale: 75 :align: center -**Properties:** +.. dropdown:: Properties -* DET curves form a linear curve in normal deviate scale if the detection - scores are normally (or close-to normally) distributed. - It was shown by [Navratil2007]_ that the reverse is not necessarily true and - even more general distributions are able to produce linear DET curves. + * DET curves form a linear curve in normal deviate scale if the detection + scores are normally (or close-to normally) distributed. + It was shown by [Navratil2007]_ that the reverse is not necessarily true and + even more general distributions are able to produce linear DET curves. -* The normal deviate scale transformation spreads out the points such that a - comparatively larger space of plot is occupied. - Therefore curves with similar classification performance might be easier to - distinguish on a DET plot. + * The normal deviate scale transformation spreads out the points such that a + comparatively larger space of plot is occupied. + Therefore curves with similar classification performance might be easier to + distinguish on a DET plot. -* With False Negative Rate being "inverse" to True Positive Rate the point - of perfection for DET curves is the origin (in contrast to the top left - corner for ROC curves). + * With False Negative Rate being "inverse" to True Positive Rate the point + of perfection for DET curves is the origin (in contrast to the top left + corner for ROC curves). -**Applications and limitations:** +.. dropdown:: Applications and limitations -DET curves are intuitive to read and hence allow quick visual assessment of a -classifier's performance. -Additionally DET curves can be consulted for threshold analysis and operating -point selection. -This is particularly helpful if a comparison of error types is required. + DET curves are intuitive to read and hence allow quick visual assessment of a + classifier's performance. + Additionally DET curves can be consulted for threshold analysis and operating + point selection. + This is particularly helpful if a comparison of error types is required. -On the other hand DET curves do not provide their metric as a single number. -Therefore for either automated evaluation or comparison to other -classification tasks metrics like the derived area under ROC curve might be -better suited. + On the other hand DET curves do not provide their metric as a single number. + Therefore for either automated evaluation or comparison to other + classification tasks metrics like the derived area under ROC curve might be + better suited. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` - for an example comparison between receiver operating characteristic (ROC) - curves and Detection error tradeoff (DET) curves. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` + for an example comparison between receiver operating characteristic (ROC) + curves and Detection error tradeoff (DET) curves. -.. topic:: References: +.. rubric:: References - .. [WikipediaDET2017] Wikipedia contributors. Detection error tradeoff. - Wikipedia, The Free Encyclopedia. September 4, 2017, 23:33 UTC. - Available at: https://en.wikipedia.org/w/index.php?title=Detection_error_tradeoff&oldid=798982054. - Accessed February 19, 2018. +.. [WikipediaDET2017] Wikipedia contributors. Detection error tradeoff. + Wikipedia, The Free Encyclopedia. September 4, 2017, 23:33 UTC. + Available at: https://en.wikipedia.org/w/index.php?title=Detection_error_tradeoff&oldid=798982054. + Accessed February 19, 2018. - .. [Martin1997] A. Martin, G. Doddington, T. Kamm, M. Ordowski, and M. Przybocki, - `The DET Curve in Assessment of Detection Task Performance - `_, - NIST 1997. +.. [Martin1997] A. Martin, G. Doddington, T. Kamm, M. Ordowski, and M. Przybocki, + `The DET Curve in Assessment of Detection Task Performance + `_, NIST 1997. - .. [Navratil2007] J. Navractil and D. Klusacek, - "`On Linear DETs, - `_" - 2007 IEEE International Conference on Acoustics, - Speech and Signal Processing - ICASSP '07, Honolulu, - HI, 2007, pp. IV-229-IV-232. +.. [Navratil2007] J. Navractil and D. Klusacek, + `"On Linear DETs" `_, + 2007 IEEE International Conference on Acoustics, + Speech and Signal Processing - ICASSP '07, Honolulu, + HI, 2007, pp. IV-229-IV-232. .. _zero_one_loss: @@ -1680,7 +1732,7 @@ loss can also be computed as :math:`zero-one loss = 1 - accuracy`. >>> zero_one_loss(y_true, y_pred) 0.25 >>> zero_one_loss(y_true, y_pred, normalize=False) - 1 + 1.0 In the multilabel case with binary label indicators, where the first label set [0,1] has an error:: @@ -1689,13 +1741,13 @@ set [0,1] has an error:: 0.5 >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)), normalize=False) - 1 + 1.0 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` - for an example of zero one loss usage to perform recursive feature - elimination with cross-validation. +* See :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` + for an example of zero one loss usage to perform recursive feature + elimination with cross-validation. .. _brier_score_loss: @@ -1751,28 +1803,27 @@ necessarily mean a better calibrated model. "Only when refinement loss remains the same does a lower Brier score loss always mean better calibration" [Bella2012]_, [Flach2008]_. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` - for an example of Brier score loss usage to perform probability - calibration of classifiers. +* See :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` + for an example of Brier score loss usage to perform probability + calibration of classifiers. -.. topic:: References: +.. rubric:: References - .. [Brier1950] G. Brier, `Verification of forecasts expressed in terms of - probability - `_, - Monthly weather review 78.1 (1950) +.. [Brier1950] G. Brier, `Verification of forecasts expressed in terms of probability + `_, + Monthly weather review 78.1 (1950) - .. [Bella2012] Bella, Ferri, HernÃĄndez-Orallo, and Ramírez-Quintana - `"Calibration of Machine Learning Models" - `_ - in Khosrow-Pour, M. "Machine learning: concepts, methodologies, tools - and applications." Hershey, PA: Information Science Reference (2012). +.. [Bella2012] Bella, Ferri, HernÃĄndez-Orallo, and Ramírez-Quintana + `"Calibration of Machine Learning Models" + `_ + in Khosrow-Pour, M. "Machine learning: concepts, methodologies, tools + and applications." Hershey, PA: Information Science Reference (2012). - .. [Flach2008] Flach, Peter, and Edson Matsubara. `"On classification, ranking, - and probability estimation." `_ - Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum fr Informatik (2008). +.. [Flach2008] Flach, Peter, and Edson Matsubara. `"On classification, ranking, + and probability estimation." `_ + Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum fr Informatik (2008). .. _class_likelihood_ratios: @@ -1825,72 +1876,72 @@ counts ``tp`` (see `the wikipedia page `_ for the actual formulas). -**Interpretation across varying prevalence:** +.. rubric:: Examples -Both class likelihood ratios are interpretable in terms of an odds ratio -(pre-test and post-tests): +* :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` -.. math:: +.. dropdown:: Interpretation across varying prevalence - \text{post-test odds} = \text{Likelihood ratio} \times \text{pre-test odds}. + Both class likelihood ratios are interpretable in terms of an odds ratio + (pre-test and post-tests): -Odds are in general related to probabilities via + .. math:: -.. math:: + \text{post-test odds} = \text{Likelihood ratio} \times \text{pre-test odds}. - \text{odds} = \frac{\text{probability}}{1 - \text{probability}}, + Odds are in general related to probabilities via -or equivalently + .. math:: -.. math:: + \text{odds} = \frac{\text{probability}}{1 - \text{probability}}, - \text{probability} = \frac{\text{odds}}{1 + \text{odds}}. + or equivalently -On a given population, the pre-test probability is given by the prevalence. By -converting odds to probabilities, the likelihood ratios can be translated into a -probability of truly belonging to either class before and after a classifier -prediction: + .. math:: -.. math:: + \text{probability} = \frac{\text{odds}}{1 + \text{odds}}. - \text{post-test odds} = \text{Likelihood ratio} \times - \frac{\text{pre-test probability}}{1 - \text{pre-test probability}}, + On a given population, the pre-test probability is given by the prevalence. By + converting odds to probabilities, the likelihood ratios can be translated into a + probability of truly belonging to either class before and after a classifier + prediction: -.. math:: + .. math:: - \text{post-test probability} = \frac{\text{post-test odds}}{1 + \text{post-test odds}}. + \text{post-test odds} = \text{Likelihood ratio} \times + \frac{\text{pre-test probability}}{1 - \text{pre-test probability}}, -**Mathematical divergences:** + .. math:: -The positive likelihood ratio is undefined when :math:`fp = 0`, which can be -interpreted as the classifier perfectly identifying positive cases. If :math:`fp -= 0` and additionally :math:`tp = 0`, this leads to a zero/zero division. This -happens, for instance, when using a `DummyClassifier` that always predicts the -negative class and therefore the interpretation as a perfect classifier is lost. + \text{post-test probability} = \frac{\text{post-test odds}}{1 + \text{post-test odds}}. -The negative likelihood ratio is undefined when :math:`tn = 0`. Such divergence -is invalid, as :math:`LR_- > 1` would indicate an increase in the odds of a -sample belonging to the positive class after being classified as negative, as if -the act of classifying caused the positive condition. This includes the case of -a `DummyClassifier` that always predicts the positive class (i.e. when -:math:`tn=fn=0`). +.. dropdown:: Mathematical divergences -Both class likelihood ratios are undefined when :math:`tp=fn=0`, which means -that no samples of the positive class were present in the testing set. This can -also happen when cross-validating highly imbalanced data. + The positive likelihood ratio is undefined when :math:`fp = 0`, which can be + interpreted as the classifier perfectly identifying positive cases. If :math:`fp + = 0` and additionally :math:`tp = 0`, this leads to a zero/zero division. This + happens, for instance, when using a `DummyClassifier` that always predicts the + negative class and therefore the interpretation as a perfect classifier is lost. -In all the previous cases the :func:`class_likelihood_ratios` function raises by -default an appropriate warning message and returns `nan` to avoid pollution when -averaging over cross-validation folds. + The negative likelihood ratio is undefined when :math:`tn = 0`. Such divergence + is invalid, as :math:`LR_- > 1` would indicate an increase in the odds of a + sample belonging to the positive class after being classified as negative, as if + the act of classifying caused the positive condition. This includes the case of + a `DummyClassifier` that always predicts the positive class (i.e. when + :math:`tn=fn=0`). -For a worked-out demonstration of the :func:`class_likelihood_ratios` function, -see the example below. + Both class likelihood ratios are undefined when :math:`tp=fn=0`, which means + that no samples of the positive class were present in the testing set. This can + also happen when cross-validating highly imbalanced data. -.. topic:: Examples: + In all the previous cases the :func:`class_likelihood_ratios` function raises by + default an appropriate warning message and returns `nan` to avoid pollution when + averaging over cross-validation folds. - * :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` + For a worked-out demonstration of the :func:`class_likelihood_ratios` function, + see the example below. -.. topic:: References: +.. dropdown:: References * `Wikipedia entry for Likelihood ratios in diagnostic testing `_ @@ -1901,6 +1952,68 @@ see the example below. Statistics in medicine, 16(9), 981-991. +.. _d2_score_classification: + +D² score for classification +--------------------------- + +The D² score computes the fraction of deviance explained. +It is a generalization of R², where the squared error is generalized and replaced +by a classification deviance of choice :math:`\text{dev}(y, \hat{y})` +(e.g., Log loss). D² is a form of a *skill score*. +It is calculated as + +.. math:: + + D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,. + +Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model +(e.g., the per-class proportion of `y_true` in the case of the Log loss). + +Like R², the best possible score is 1.0 and it can be negative (because the +model can be arbitrarily worse). A constant model that always predicts +:math:`y_{\text{null}}`, disregarding the input features, would get a D² score +of 0.0. + +.. dropdown:: D2 log loss score + + The :func:`d2_log_loss_score` function implements the special case + of D² with the log loss, see :ref:`log_loss`, i.e.: + + .. math:: + + \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). + + Here are some usage examples of the :func:`d2_log_loss_score` function:: + + >>> from sklearn.metrics import d2_log_loss_score + >>> y_true = [1, 1, 2, 3] + >>> y_pred = [ + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.98, 0.01, 0.01], + ... [0.01, 0.98, 0.01], + ... [0.01, 0.01, 0.98], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.981... + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.1, 0.6, 0.3], + ... [0.1, 0.6, 0.3], + ... [0.4, 0.5, 0.1], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + -0.552... + + .. _multilabel_ranking_metrics: Multilabel ranking metrics @@ -2039,11 +2152,12 @@ Here is a small example of usage of this function:: 0.0 -.. topic:: References: +.. dropdown:: References * Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and knowledge discovery handbook (pp. 667-685). Springer US. + .. _ndcg: Normalized Discounted Cumulative Gain @@ -2088,7 +2202,7 @@ DCG score is and the NDCG score is the DCG score divided by the DCG score obtained for :math:`y`. -.. topic:: References: +.. dropdown:: References * `Wikipedia entry for Discounted Cumulative Gain `_ @@ -2106,6 +2220,7 @@ and the NDCG score is the DCG score divided by the DCG score obtained for European conference on information retrieval (pp. 414-421). Springer, Berlin, Heidelberg. + .. _regression_metrics: Regression metrics @@ -2137,9 +2252,6 @@ leads to a weighting of each individual score by the variance of the corresponding target variable. This setting quantifies the globally captured unscaled variance. If the target variables are of different scale, then this score puts more importance on explaining the higher variance variables. -``multioutput='variance_weighted'`` is the default value for :func:`r2_score` -for backward compatibility. This will be changed to ``uniform_average`` in the -future. .. _r2_score: @@ -2217,11 +2329,11 @@ Here is a small example of usage of the :func:`r2_score` function:: >>> r2_score(y_true, y_pred, force_finite=False) -inf -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - for an example of R² score usage to - evaluate Lasso and Elastic Net on sparse signals. +* See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` + for an example of R² score usage to + evaluate Lasso and Elastic Net on sparse signals. .. _mean_absolute_error: @@ -2288,11 +2400,14 @@ function:: >>> mean_squared_error(y_true, y_pred) 0.7083... -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` - for an example of mean squared error usage to - evaluate gradient boosting regression. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` + for an example of mean squared error usage to evaluate gradient boosting regression. + +Taking the square root of the MSE, called the root mean squared error (RMSE), is another +common metric that provides a measure in the same units as the target variable. RSME is +available through the :func:`root_mean_squared_error` function. .. _mean_squared_log_error: @@ -2331,6 +2446,9 @@ function:: >>> mean_squared_log_error(y_true, y_pred) 0.044... +The root mean squared logarithmic error (RMSLE) is available through the +:func:`root_mean_squared_log_error` function. + .. _mean_absolute_percentage_error: Mean absolute percentage error @@ -2623,12 +2741,12 @@ It is also possible to build scorer objects for hyper-parameter tuning. The sign of the loss must be switched to ensure that greater means better as explained in the example linked below. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py` - for an example of using the pinball loss to evaluate and tune the - hyper-parameters of quantile regression models on data with non-symmetric - noise and outliers. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py` + for an example of using the pinball loss to evaluate and tune the + hyper-parameters of quantile regression models on data with non-symmetric + noise and outliers. .. _d2_score: @@ -2654,68 +2772,66 @@ model can be arbitrarily worse). A constant model that always predicts :math:`y_{\text{null}}`, disregarding the input features, would get a D² score of 0.0. -D² Tweedie score -^^^^^^^^^^^^^^^^ +.. dropdown:: D² Tweedie score -The :func:`d2_tweedie_score` function implements the special case of D² -where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`. -It is also known as D² Tweedie and is related to McFadden's likelihood ratio index. + The :func:`d2_tweedie_score` function implements the special case of D² + where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`. + It is also known as D² Tweedie and is related to McFadden's likelihood ratio index. -The argument ``power`` defines the Tweedie power as for -:func:`mean_tweedie_deviance`. Note that for `power=0`, -:func:`d2_tweedie_score` equals :func:`r2_score` (for single targets). + The argument ``power`` defines the Tweedie power as for + :func:`mean_tweedie_deviance`. Note that for `power=0`, + :func:`d2_tweedie_score` equals :func:`r2_score` (for single targets). -A scorer object with a specific choice of ``power`` can be built by:: + A scorer object with a specific choice of ``power`` can be built by:: - >>> from sklearn.metrics import d2_tweedie_score, make_scorer - >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5) + >>> from sklearn.metrics import d2_tweedie_score, make_scorer + >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5) -D² pinball score -^^^^^^^^^^^^^^^^^^^^^ +.. dropdown:: D² pinball score -The :func:`d2_pinball_score` function implements the special case -of D² with the pinball loss, see :ref:`pinball_loss`, i.e.: + The :func:`d2_pinball_score` function implements the special case + of D² with the pinball loss, see :ref:`pinball_loss`, i.e.: -.. math:: + .. math:: - \text{dev}(y, \hat{y}) = \text{pinball}(y, \hat{y}). + \text{dev}(y, \hat{y}) = \text{pinball}(y, \hat{y}). -The argument ``alpha`` defines the slope of the pinball loss as for -:func:`mean_pinball_loss` (:ref:`pinball_loss`). It determines the -quantile level ``alpha`` for which the pinball loss and also D² -are optimal. Note that for `alpha=0.5` (the default) :func:`d2_pinball_score` -equals :func:`d2_absolute_error_score`. + The argument ``alpha`` defines the slope of the pinball loss as for + :func:`mean_pinball_loss` (:ref:`pinball_loss`). It determines the + quantile level ``alpha`` for which the pinball loss and also D² + are optimal. Note that for `alpha=0.5` (the default) :func:`d2_pinball_score` + equals :func:`d2_absolute_error_score`. -A scorer object with a specific choice of ``alpha`` can be built by:: + A scorer object with a specific choice of ``alpha`` can be built by:: - >>> from sklearn.metrics import d2_pinball_score, make_scorer - >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8) + >>> from sklearn.metrics import d2_pinball_score, make_scorer + >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8) -D² absolute error score -^^^^^^^^^^^^^^^^^^^^^^^ +.. dropdown:: D² absolute error score -The :func:`d2_absolute_error_score` function implements the special case of -the :ref:`mean_absolute_error`: + The :func:`d2_absolute_error_score` function implements the special case of + the :ref:`mean_absolute_error`: -.. math:: + .. math:: - \text{dev}(y, \hat{y}) = \text{MAE}(y, \hat{y}). + \text{dev}(y, \hat{y}) = \text{MAE}(y, \hat{y}). -Here are some usage examples of the :func:`d2_absolute_error_score` function:: + Here are some usage examples of the :func:`d2_absolute_error_score` function:: + + >>> from sklearn.metrics import d2_absolute_error_score + >>> y_true = [3, -0.5, 2, 7] + >>> y_pred = [2.5, 0.0, 2, 8] + >>> d2_absolute_error_score(y_true, y_pred) + 0.764... + >>> y_true = [1, 2, 3] + >>> y_pred = [1, 2, 3] + >>> d2_absolute_error_score(y_true, y_pred) + 1.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [2, 2, 2] + >>> d2_absolute_error_score(y_true, y_pred) + 0.0 - >>> from sklearn.metrics import d2_absolute_error_score - >>> y_true = [3, -0.5, 2, 7] - >>> y_pred = [2.5, 0.0, 2, 8] - >>> d2_absolute_error_score(y_true, y_pred) - 0.764... - >>> y_true = [1, 2, 3] - >>> y_pred = [1, 2, 3] - >>> d2_absolute_error_score(y_true, y_pred) - 1.0 - >>> y_true = [1, 2, 3] - >>> y_pred = [2, 2, 2] - >>> d2_absolute_error_score(y_true, y_pred) - 0.0 .. _visualization_regression_evaluation: @@ -2769,8 +2885,8 @@ model would grow with the predicted value of `E[y|X]` (either linearly for Poisson or quadratically for Gamma). When fitting a linear least squares regression model (see -:class:`~sklearn.linear_mnodel.LinearRegression` and -:class:`~sklearn.linear_mnodel.Ridge`), we can use this plot to check +:class:`~sklearn.linear_model.LinearRegression` and +:class:`~sklearn.linear_model.Ridge`), we can use this plot to check if some of the `model assumptions `_ are met, in particular that the residuals should be uncorrelated, their @@ -2785,12 +2901,12 @@ model might be useful. Refer to the example below to see a model evaluation that makes use of this display. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` for - an example on how to use :class:`~sklearn.metrics.PredictionErrorDisplay` - to visualize the prediction quality improvement of a regression model - obtained by transforming the target before learning. +* See :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` for + an example on how to use :class:`~sklearn.metrics.PredictionErrorDisplay` + to visualize the prediction quality improvement of a regression model + obtained by transforming the target before learning. .. _clustering_metrics: diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index beee41e2aea0b..b5f7611bdfd91 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -63,8 +63,8 @@ can provide additional strategies beyond what is built-in: - :class:`semi_supervised.LabelSpreading` - :class:`discriminant_analysis.LinearDiscriminantAnalysis` - :class:`svm.LinearSVC` (setting multi_class="crammer_singer") - - :class:`linear_model.LogisticRegression` (setting multi_class="multinomial") - - :class:`linear_model.LogisticRegressionCV` (setting multi_class="multinomial") + - :class:`linear_model.LogisticRegression` (with most solvers) + - :class:`linear_model.LogisticRegressionCV` (with most solvers) - :class:`neural_network.MLPClassifier` - :class:`neighbors.NearestCentroid` - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` @@ -86,8 +86,8 @@ can provide additional strategies beyond what is built-in: - :class:`ensemble.GradientBoostingClassifier` - :class:`gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest") - :class:`svm.LinearSVC` (setting multi_class="ovr") - - :class:`linear_model.LogisticRegression` (setting multi_class="ovr") - - :class:`linear_model.LogisticRegressionCV` (setting multi_class="ovr") + - :class:`linear_model.LogisticRegression` (most solvers) + - :class:`linear_model.LogisticRegressionCV` (most solvers) - :class:`linear_model.SGDClassifier` - :class:`linear_model.Perceptron` - :class:`linear_model.PassiveAggressiveClassifier` @@ -147,35 +147,35 @@ Target format Valid :term:`multiclass` representations for :func:`~sklearn.utils.multiclass.type_of_target` (`y`) are: - - 1d or column vector containing more than two discrete values. An - example of a vector ``y`` for 4 samples: - - >>> import numpy as np - >>> y = np.array(['apple', 'pear', 'apple', 'orange']) - >>> print(y) - ['apple' 'pear' 'apple' 'orange'] - - - Dense or sparse :term:`binary` matrix of shape ``(n_samples, n_classes)`` - with a single sample per row, where each column represents one class. An - example of both a dense and sparse :term:`binary` matrix ``y`` for 4 - samples, where the columns, in order, are apple, orange, and pear: - - >>> import numpy as np - >>> from sklearn.preprocessing import LabelBinarizer - >>> y = np.array(['apple', 'pear', 'apple', 'orange']) - >>> y_dense = LabelBinarizer().fit_transform(y) - >>> print(y_dense) - [[1 0 0] - [0 0 1] - [1 0 0] - [0 1 0]] - >>> from scipy import sparse - >>> y_sparse = sparse.csr_matrix(y_dense) - >>> print(y_sparse) - (0, 0) 1 - (1, 2) 1 - (2, 0) 1 - (3, 1) 1 +- 1d or column vector containing more than two discrete values. An + example of a vector ``y`` for 4 samples: + + >>> import numpy as np + >>> y = np.array(['apple', 'pear', 'apple', 'orange']) + >>> print(y) + ['apple' 'pear' 'apple' 'orange'] + +- Dense or sparse :term:`binary` matrix of shape ``(n_samples, n_classes)`` + with a single sample per row, where each column represents one class. An + example of both a dense and sparse :term:`binary` matrix ``y`` for 4 + samples, where the columns, in order, are apple, orange, and pear: + + >>> import numpy as np + >>> from sklearn.preprocessing import LabelBinarizer + >>> y = np.array(['apple', 'pear', 'apple', 'orange']) + >>> y_dense = LabelBinarizer().fit_transform(y) + >>> print(y_dense) + [[1 0 0] + [0 0 1] + [1 0 0] + [0 1 0]] + >>> from scipy import sparse + >>> y_sparse = sparse.csr_matrix(y_dense) + >>> print(y_sparse) + (0, 0) 1 + (1, 2) 1 + (2, 0) 1 + (3, 1) 1 For more information about :class:`~sklearn.preprocessing.LabelBinarizer`, refer to :ref:`preprocessing_targets`. @@ -201,7 +201,7 @@ Below is an example of multiclass learning using OvR:: >>> from sklearn.multiclass import OneVsRestClassifier >>> from sklearn.svm import LinearSVC >>> X, y = datasets.load_iris(return_X_y=True) - >>> OneVsRestClassifier(LinearSVC(dual="auto", random_state=0)).fit(X, y).predict(X) + >>> OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y).predict(X) array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -222,9 +222,9 @@ in which cell [i, j] indicates the presence of label j in sample i. :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multilabel.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multilabel.py` .. _ovo_classification: @@ -253,7 +253,7 @@ Below is an example of multiclass learning using OvO:: >>> from sklearn.multiclass import OneVsOneClassifier >>> from sklearn.svm import LinearSVC >>> X, y = datasets.load_iris(return_X_y=True) - >>> OneVsOneClassifier(LinearSVC(dual="auto", random_state=0)).fit(X, y).predict(X) + >>> OneVsOneClassifier(LinearSVC(random_state=0)).fit(X, y).predict(X) array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -263,10 +263,10 @@ Below is an example of multiclass learning using OvO:: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) -.. topic:: References: +.. rubric:: References - * "Pattern Recognition and Machine Learning. Springer", - Christopher M. Bishop, page 183, (First Edition) +* "Pattern Recognition and Machine Learning. Springer", + Christopher M. Bishop, page 183, (First Edition) .. _ecoc: @@ -311,8 +311,7 @@ Below is an example of multiclass learning using Output-Codes:: >>> from sklearn.multiclass import OutputCodeClassifier >>> from sklearn.svm import LinearSVC >>> X, y = datasets.load_iris(return_X_y=True) - >>> clf = OutputCodeClassifier(LinearSVC(dual="auto", random_state=0), - ... code_size=2, random_state=0) + >>> clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0) >>> clf.fit(X, y).predict(X) array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -322,21 +321,16 @@ Below is an example of multiclass learning using Output-Codes:: 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) -.. topic:: References: +.. rubric:: References - * "Solving multiclass learning problems via error-correcting output codes", - Dietterich T., Bakiri G., - Journal of Artificial Intelligence Research 2, - 1995. +* "Solving multiclass learning problems via error-correcting output codes", + Dietterich T., Bakiri G., Journal of Artificial Intelligence Research 2, 1995. - .. [3] "The error coding method and PICTs", - James G., Hastie T., - Journal of Computational and Graphical statistics 7, - 1998. +.. [3] "The error coding method and PICTs", James G., Hastie T., + Journal of Computational and Graphical statistics 7, 1998. - * "The Elements of Statistical Learning", - Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) - 2008. +* "The Elements of Statistical Learning", + Hastie T., Tibshirani R., Friedman J., page 606 (second-edition), 2008. .. _multilabel_classification: @@ -433,10 +427,10 @@ one does not know the optimal ordering of the models in the chain so typically many randomly ordered chains are fit and their predictions are averaged together. -.. topic:: References: +.. rubric:: References - Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, - "Classifier Chains for Multi-label Classification", 2009. +* Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, + "Classifier Chains for Multi-label Classification", 2009. .. _multiclass_multioutput_classification: @@ -529,6 +523,37 @@ using data obtained at a certain location. Each sample would be data obtained at one location and both wind speed and direction would be output for each sample. +The following regressors natively support multioutput regression: + + - :class:`cross_decomposition.CCA` + - :class:`tree.DecisionTreeRegressor` + - :class:`dummy.DummyRegressor` + - :class:`linear_model.ElasticNet` + - :class:`tree.ExtraTreeRegressor` + - :class:`ensemble.ExtraTreesRegressor` + - :class:`gaussian_process.GaussianProcessRegressor` + - :class:`neighbors.KNeighborsRegressor` + - :class:`kernel_ridge.KernelRidge` + - :class:`linear_model.Lars` + - :class:`linear_model.Lasso` + - :class:`linear_model.LassoLars` + - :class:`linear_model.LinearRegression` + - :class:`multioutput.MultiOutputRegressor` + - :class:`linear_model.MultiTaskElasticNet` + - :class:`linear_model.MultiTaskElasticNetCV` + - :class:`linear_model.MultiTaskLasso` + - :class:`linear_model.MultiTaskLassoCV` + - :class:`linear_model.OrthogonalMatchingPursuit` + - :class:`cross_decomposition.PLSCanonical` + - :class:`cross_decomposition.PLSRegression` + - :class:`linear_model.RANSACRegressor` + - :class:`neighbors.RadiusNeighborsRegressor` + - :class:`ensemble.RandomForestRegressor` + - :class:`multioutput.RegressorChain` + - :class:`linear_model.Ridge` + - :class:`linear_model.RidgeCV` + - :class:`compose.TransformedTargetRegressor` + Target format ------------- diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst index 1cb8aa0d6dedf..6e80ec6145919 100644 --- a/doc/modules/naive_bayes.rst +++ b/doc/modules/naive_bayes.rst @@ -69,11 +69,11 @@ On the flip side, although naive Bayes is known as a decent classifier, it is known to be a bad estimator, so the probability outputs from ``predict_proba`` are not to be taken too seriously. -.. topic:: References: +.. dropdown:: References - * H. Zhang (2004). `The optimality of Naive Bayes. - `_ - Proc. FLAIRS. + * H. Zhang (2004). `The optimality of Naive Bayes. + `_ + Proc. FLAIRS. .. _gaussian_naive_bayes: @@ -147,38 +147,42 @@ that is particularly suited for imbalanced data sets. Specifically, CNB uses statistics from the *complement* of each class to compute the model's weights. The inventors of CNB show empirically that the parameter estimates for CNB are more stable than those for MNB. Further, CNB regularly outperforms MNB (often -by a considerable margin) on text classification tasks. The procedure for -calculating the weights is as follows: +by a considerable margin) on text classification tasks. -.. math:: +.. dropdown:: Weights calculation - \hat{\theta}_{ci} = \frac{\alpha_i + \sum_{j:y_j \neq c} d_{ij}} - {\alpha + \sum_{j:y_j \neq c} \sum_{k} d_{kj}} + The procedure for calculating the weights is as follows: - w_{ci} = \log \hat{\theta}_{ci} + .. math:: - w_{ci} = \frac{w_{ci}}{\sum_{j} |w_{cj}|} + \hat{\theta}_{ci} = \frac{\alpha_i + \sum_{j:y_j \neq c} d_{ij}} + {\alpha + \sum_{j:y_j \neq c} \sum_{k} d_{kj}} -where the summations are over all documents :math:`j` not in class :math:`c`, -:math:`d_{ij}` is either the count or tf-idf value of term :math:`i` in document -:math:`j`, :math:`\alpha_i` is a smoothing hyperparameter like that found in -MNB, and :math:`\alpha = \sum_{i} \alpha_i`. The second normalization addresses -the tendency for longer documents to dominate parameter estimates in MNB. The -classification rule is: + w_{ci} = \log \hat{\theta}_{ci} -.. math:: + w_{ci} = \frac{w_{ci}}{\sum_{j} |w_{cj}|} + + where the summations are over all documents :math:`j` not in class :math:`c`, + :math:`d_{ij}` is either the count or tf-idf value of term :math:`i` in document + :math:`j`, :math:`\alpha_i` is a smoothing hyperparameter like that found in + MNB, and :math:`\alpha = \sum_{i} \alpha_i`. The second normalization addresses + the tendency for longer documents to dominate parameter estimates in MNB. The + classification rule is: + + .. math:: - \hat{c} = \arg\min_c \sum_{i} t_i w_{ci} + \hat{c} = \arg\min_c \sum_{i} t_i w_{ci} -i.e., a document is assigned to the class that is the *poorest* complement -match. + i.e., a document is assigned to the class that is the *poorest* complement + match. -.. topic:: References: +.. dropdown:: References + + * Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). + `Tackling the poor assumptions of naive bayes text classifiers. + `_ + In ICML (Vol. 3, pp. 616-623). - * Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). - `Tackling the poor assumptions of naive bayes text classifiers. - `_ - In ICML (Vol. 3, pp. 616-623). .. _bernoulli_naive_bayes: @@ -190,7 +194,7 @@ algorithms for data that is distributed according to multivariate Bernoulli distributions; i.e., there may be multiple features but each one is assumed to be a binary-valued (Bernoulli, boolean) variable. Therefore, this class requires samples to be represented as binary-valued -feature vectors; if handed any other kind of data, a ``BernoulliNB`` instance +feature vectors; if handed any other kind of data, a :class:`BernoulliNB` instance may binarize its input (depending on the ``binarize`` parameter). The decision rule for Bernoulli naive Bayes is based on @@ -205,24 +209,25 @@ that is an indicator for class :math:`y`, where the multinomial variant would simply ignore a non-occurring feature. In the case of text classification, word occurrence vectors (rather than word -count vectors) may be used to train and use this classifier. ``BernoulliNB`` +count vectors) may be used to train and use this classifier. :class:`BernoulliNB` might perform better on some datasets, especially those with shorter documents. It is advisable to evaluate both models, if time permits. -.. topic:: References: +.. dropdown:: References + + * C.D. Manning, P. Raghavan and H. SchÃŧtze (2008). Introduction to + Information Retrieval. Cambridge University Press, pp. 234-265. - * C.D. Manning, P. Raghavan and H. SchÃŧtze (2008). Introduction to - Information Retrieval. Cambridge University Press, pp. 234-265. + * A. McCallum and K. Nigam (1998). + `A comparison of event models for Naive Bayes text classification. + `_ + Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. - * A. McCallum and K. Nigam (1998). - `A comparison of event models for Naive Bayes text classification. - `_ - Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. + * V. Metsis, I. Androutsopoulos and G. Paliouras (2006). + `Spam filtering with Naive Bayes -- Which Naive Bayes? + `_ + 3rd Conf. on Email and Anti-Spam (CEAS). - * V. Metsis, I. Androutsopoulos and G. Paliouras (2006). - `Spam filtering with Naive Bayes -- Which Naive Bayes? - `_ - 3rd Conf. on Email and Anti-Spam (CEAS). .. _categorical_naive_bayes: @@ -239,23 +244,25 @@ For each feature :math:`i` in the training set :math:`X`, of X conditioned on the class y. The index set of the samples is defined as :math:`J = \{ 1, \dots, m \}`, with :math:`m` as the number of samples. -The probability of category :math:`t` in feature :math:`i` given class -:math:`c` is estimated as: +.. dropdown:: Probability calculation -.. math:: + The probability of category :math:`t` in feature :math:`i` given class + :math:`c` is estimated as: + + .. math:: - P(x_i = t \mid y = c \: ;\, \alpha) = \frac{ N_{tic} + \alpha}{N_{c} + - \alpha n_i}, + P(x_i = t \mid y = c \: ;\, \alpha) = \frac{ N_{tic} + \alpha}{N_{c} + + \alpha n_i}, -where :math:`N_{tic} = |\{j \in J \mid x_{ij} = t, y_j = c\}|` is the number -of times category :math:`t` appears in the samples :math:`x_{i}`, which belong -to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number -of samples with class c, :math:`\alpha` is a smoothing parameter and -:math:`n_i` is the number of available categories of feature :math:`i`. + where :math:`N_{tic} = |\{j \in J \mid x_{ij} = t, y_j = c\}|` is the number + of times category :math:`t` appears in the samples :math:`x_{i}`, which belong + to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number + of samples with class c, :math:`\alpha` is a smoothing parameter and + :math:`n_i` is the number of available categories of feature :math:`i`. -:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded -(for instance with the help of :class:`OrdinalEncoder`) such that all -categories for each feature :math:`i` are represented with numbers +:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded (for +instance with the help of :class:`~sklearn.preprocessing.OrdinalEncoder`) such +that all categories for each feature :math:`i` are represented with numbers :math:`0, ..., n_i - 1` where :math:`n_i` is the number of available categories of feature :math:`i`. diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index 90856b6933f3e..de0eff67018bc 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -59,12 +59,12 @@ The choice of neighbors search algorithm is controlled through the keyword from the training data. For a discussion of the strengths and weaknesses of each option, see `Nearest Neighbor Algorithms`_. - .. warning:: +.. warning:: - Regarding the Nearest Neighbors algorithms, if two - neighbors :math:`k+1` and :math:`k` have identical distances - but different labels, the result will depend on the ordering of the - training data. + Regarding the Nearest Neighbors algorithms, if two + neighbors :math:`k+1` and :math:`k` have identical distances + but different labels, the result will depend on the ordering of the + training data. Finding the Nearest Neighbors ----------------------------- @@ -136,12 +136,12 @@ have the same interface; we'll show an example of using the KD Tree here: Refer to the :class:`KDTree` and :class:`BallTree` class documentation for more information on the options available for nearest neighbors searches, including specification of query strategies, distance metrics, etc. For a list -of valid metrics use :meth:`KDTree.valid_metrics` and :meth:`BallTree.valid_metrics`: +of valid metrics use `KDTree.valid_metrics` and `BallTree.valid_metrics`: >>> from sklearn.neighbors import KDTree, BallTree - >>> KDTree.valid_metrics() + >>> KDTree.valid_metrics ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity'] - >>> BallTree.valid_metrics() + >>> BallTree.valid_metrics ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc'] .. _classification: @@ -188,18 +188,14 @@ distance can be supplied to compute the weights. .. |classification_1| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_001.png :target: ../auto_examples/neighbors/plot_classification.html - :scale: 50 - -.. |classification_2| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_002.png - :target: ../auto_examples/neighbors/plot_classification.html - :scale: 50 + :scale: 75 -.. centered:: |classification_1| |classification_2| +.. centered:: |classification_1| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`: an example of - classification using nearest neighbors. +* :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`: an example of + classification using nearest neighbors. .. _regression: @@ -245,13 +241,13 @@ the lower half of those faces. :align: center -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_regression.py`: an example of regression - using nearest neighbors. +* :ref:`sphx_glr_auto_examples_neighbors_plot_regression.py`: an example of regression + using nearest neighbors. - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`: an example of - multi-output regression using nearest neighbors. +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`: + an example of multi-output regression using nearest neighbors. Nearest Neighbor Algorithms @@ -309,11 +305,11 @@ keyword ``algorithm = 'kd_tree'``, and are computed using the class :class:`KDTree`. -.. topic:: References: +.. dropdown:: References - * `"Multidimensional binary search trees used for associative searching" - `_, - Bentley, J.L., Communications of the ACM (1975) + * `"Multidimensional binary search trees used for associative searching" + `_, + Bentley, J.L., Communications of the ACM (1975) .. _ball_tree: @@ -347,142 +343,142 @@ neighbors searches are specified using the keyword ``algorithm = 'ball_tree'``, and are computed using the class :class:`BallTree`. Alternatively, the user can work with the :class:`BallTree` class directly. -.. topic:: References: - - * `"Five Balltree Construction Algorithms" - `_, - Omohundro, S.M., International Computer Science Institute - Technical Report (1989) - -Choice of Nearest Neighbors Algorithm -------------------------------------- -The optimal algorithm for a given dataset is a complicated choice, and -depends on a number of factors: - -* number of samples :math:`N` (i.e. ``n_samples``) and dimensionality - :math:`D` (i.e. ``n_features``). - - * *Brute force* query time grows as :math:`O[D N]` - * *Ball tree* query time grows as approximately :math:`O[D \log(N)]` - * *KD tree* query time changes with :math:`D` in a way that is difficult - to precisely characterise. For small :math:`D` (less than 20 or so) - the cost is approximately :math:`O[D\log(N)]`, and the KD tree - query can be very efficient. - For larger :math:`D`, the cost increases to nearly :math:`O[DN]`, and - the overhead due to the tree - structure can lead to queries which are slower than brute force. - - For small data sets (:math:`N` less than 30 or so), :math:`\log(N)` is - comparable to :math:`N`, and brute force algorithms can be more efficient - than a tree-based approach. Both :class:`KDTree` and :class:`BallTree` - address this through providing a *leaf size* parameter: this controls the - number of samples at which a query switches to brute-force. This allows both - algorithms to approach the efficiency of a brute-force computation for small - :math:`N`. - -* data structure: *intrinsic dimensionality* of the data and/or *sparsity* - of the data. Intrinsic dimensionality refers to the dimension - :math:`d \le D` of a manifold on which the data lies, which can be linearly - or non-linearly embedded in the parameter space. Sparsity refers to the - degree to which the data fills the parameter space (this is to be - distinguished from the concept as used in "sparse" matrices. The data - matrix may have no zero entries, but the **structure** can still be - "sparse" in this sense). - - * *Brute force* query time is unchanged by data structure. - * *Ball tree* and *KD tree* query times can be greatly influenced - by data structure. In general, sparser data with a smaller intrinsic - dimensionality leads to faster query times. Because the KD tree - internal representation is aligned with the parameter axes, it will not - generally show as much improvement as ball tree for arbitrarily - structured data. - - Datasets used in machine learning tend to be very structured, and are - very well-suited for tree-based queries. - -* number of neighbors :math:`k` requested for a query point. - - * *Brute force* query time is largely unaffected by the value of :math:`k` - * *Ball tree* and *KD tree* query time will become slower as :math:`k` - increases. This is due to two effects: first, a larger :math:`k` leads - to the necessity to search a larger portion of the parameter space. - Second, using :math:`k > 1` requires internal queueing of results - as the tree is traversed. - - As :math:`k` becomes large compared to :math:`N`, the ability to prune - branches in a tree-based query is reduced. In this situation, Brute force - queries can be more efficient. - -* number of query points. Both the ball tree and the KD Tree - require a construction phase. The cost of this construction becomes - negligible when amortized over many queries. If only a small number of - queries will be performed, however, the construction can make up - a significant fraction of the total cost. If very few query points - will be required, brute force is better than a tree-based method. - -Currently, ``algorithm = 'auto'`` selects ``'brute'`` if any of the following -conditions are verified: - -* input data is sparse -* ``metric = 'precomputed'`` -* :math:`D > 15` -* :math:`k >= N/2` -* ``effective_metric_`` isn't in the ``VALID_METRICS`` list for either - ``'kd_tree'`` or ``'ball_tree'`` - -Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'`` that -has ``effective_metric_`` in its ``VALID_METRICS`` list. This heuristic is -based on the following assumptions: - -* the number of query points is at least the same order as the number of - training points -* ``leaf_size`` is close to its default value of ``30`` -* when :math:`D > 15`, the intrinsic dimensionality of the data is generally - too high for tree-based methods - -Effect of ``leaf_size`` ------------------------ -As noted above, for small sample sizes a brute force search can be more -efficient than a tree-based query. This fact is accounted for in the ball -tree and KD tree by internally switching to brute force searches within -leaf nodes. The level of this switch can be specified with the parameter -``leaf_size``. This parameter choice has many effects: - -**construction time** - A larger ``leaf_size`` leads to a faster tree construction time, because - fewer nodes need to be created - -**query time** - Both a large or small ``leaf_size`` can lead to suboptimal query cost. - For ``leaf_size`` approaching 1, the overhead involved in traversing - nodes can significantly slow query times. For ``leaf_size`` approaching - the size of the training set, queries become essentially brute force. - A good compromise between these is ``leaf_size = 30``, the default value - of the parameter. - -**memory** - As ``leaf_size`` increases, the memory required to store a tree structure - decreases. This is especially important in the case of ball tree, which - stores a :math:`D`-dimensional centroid for each node. The required - storage space for :class:`BallTree` is approximately ``1 / leaf_size`` times - the size of the training set. - -``leaf_size`` is not referenced for brute force queries. - -Valid Metrics for Nearest Neighbor Algorithms ---------------------------------------------- - -For a list of available metrics, see the documentation of the :class:`DistanceMetric` -class and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. -Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. - -A list of valid metrics for any of the above algorithms can be obtained by using their -``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by: - - >>> from sklearn.neighbors import KDTree - >>> print(sorted(KDTree.valid_metrics())) - ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p'] +.. dropdown:: References + + * `"Five Balltree Construction Algorithms" + `_, + Omohundro, S.M., International Computer Science Institute + Technical Report (1989) + +.. dropdown:: Choice of Nearest Neighbors Algorithm + + The optimal algorithm for a given dataset is a complicated choice, and + depends on a number of factors: + + * number of samples :math:`N` (i.e. ``n_samples``) and dimensionality + :math:`D` (i.e. ``n_features``). + + * *Brute force* query time grows as :math:`O[D N]` + * *Ball tree* query time grows as approximately :math:`O[D \log(N)]` + * *KD tree* query time changes with :math:`D` in a way that is difficult + to precisely characterise. For small :math:`D` (less than 20 or so) + the cost is approximately :math:`O[D\log(N)]`, and the KD tree + query can be very efficient. + For larger :math:`D`, the cost increases to nearly :math:`O[DN]`, and + the overhead due to the tree + structure can lead to queries which are slower than brute force. + + For small data sets (:math:`N` less than 30 or so), :math:`\log(N)` is + comparable to :math:`N`, and brute force algorithms can be more efficient + than a tree-based approach. Both :class:`KDTree` and :class:`BallTree` + address this through providing a *leaf size* parameter: this controls the + number of samples at which a query switches to brute-force. This allows both + algorithms to approach the efficiency of a brute-force computation for small + :math:`N`. + + * data structure: *intrinsic dimensionality* of the data and/or *sparsity* + of the data. Intrinsic dimensionality refers to the dimension + :math:`d \le D` of a manifold on which the data lies, which can be linearly + or non-linearly embedded in the parameter space. Sparsity refers to the + degree to which the data fills the parameter space (this is to be + distinguished from the concept as used in "sparse" matrices. The data + matrix may have no zero entries, but the **structure** can still be + "sparse" in this sense). + + * *Brute force* query time is unchanged by data structure. + * *Ball tree* and *KD tree* query times can be greatly influenced + by data structure. In general, sparser data with a smaller intrinsic + dimensionality leads to faster query times. Because the KD tree + internal representation is aligned with the parameter axes, it will not + generally show as much improvement as ball tree for arbitrarily + structured data. + + Datasets used in machine learning tend to be very structured, and are + very well-suited for tree-based queries. + + * number of neighbors :math:`k` requested for a query point. + + * *Brute force* query time is largely unaffected by the value of :math:`k` + * *Ball tree* and *KD tree* query time will become slower as :math:`k` + increases. This is due to two effects: first, a larger :math:`k` leads + to the necessity to search a larger portion of the parameter space. + Second, using :math:`k > 1` requires internal queueing of results + as the tree is traversed. + + As :math:`k` becomes large compared to :math:`N`, the ability to prune + branches in a tree-based query is reduced. In this situation, Brute force + queries can be more efficient. + + * number of query points. Both the ball tree and the KD Tree + require a construction phase. The cost of this construction becomes + negligible when amortized over many queries. If only a small number of + queries will be performed, however, the construction can make up + a significant fraction of the total cost. If very few query points + will be required, brute force is better than a tree-based method. + + Currently, ``algorithm = 'auto'`` selects ``'brute'`` if any of the following + conditions are verified: + + * input data is sparse + * ``metric = 'precomputed'`` + * :math:`D > 15` + * :math:`k >= N/2` + * ``effective_metric_`` isn't in the ``VALID_METRICS`` list for either + ``'kd_tree'`` or ``'ball_tree'`` + + Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'`` that + has ``effective_metric_`` in its ``VALID_METRICS`` list. This heuristic is + based on the following assumptions: + + * the number of query points is at least the same order as the number of + training points + * ``leaf_size`` is close to its default value of ``30`` + * when :math:`D > 15`, the intrinsic dimensionality of the data is generally + too high for tree-based methods + +.. dropdown:: Effect of ``leaf_size`` + + As noted above, for small sample sizes a brute force search can be more + efficient than a tree-based query. This fact is accounted for in the ball + tree and KD tree by internally switching to brute force searches within + leaf nodes. The level of this switch can be specified with the parameter + ``leaf_size``. This parameter choice has many effects: + + **construction time** + A larger ``leaf_size`` leads to a faster tree construction time, because + fewer nodes need to be created + + **query time** + Both a large or small ``leaf_size`` can lead to suboptimal query cost. + For ``leaf_size`` approaching 1, the overhead involved in traversing + nodes can significantly slow query times. For ``leaf_size`` approaching + the size of the training set, queries become essentially brute force. + A good compromise between these is ``leaf_size = 30``, the default value + of the parameter. + + **memory** + As ``leaf_size`` increases, the memory required to store a tree structure + decreases. This is especially important in the case of ball tree, which + stores a :math:`D`-dimensional centroid for each node. The required + storage space for :class:`BallTree` is approximately ``1 / leaf_size`` times + the size of the training set. + + ``leaf_size`` is not referenced for brute force queries. + +.. dropdown:: Valid Metrics for Nearest Neighbor Algorithms + + For a list of available metrics, see the documentation of the + :class:`~sklearn.metrics.DistanceMetric` class and the metrics listed in + `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the "cosine" + metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. + + A list of valid metrics for any of the above algorithms can be obtained by using their + ``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by: + + >>> from sklearn.neighbors import KDTree + >>> print(sorted(KDTree.valid_metrics)) + ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p'] .. _nearest_centroid_classifier: @@ -535,10 +531,10 @@ the model from 0.81 to 0.82. .. centered:: |nearest_centroid_1| |nearest_centroid_2| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_nearest_centroid.py`: an example of - classification using nearest centroid with different shrink thresholds. +* :ref:`sphx_glr_auto_examples_neighbors_plot_nearest_centroid.py`: an example of + classification using nearest centroid with different shrink thresholds. .. _neighbors_transformer: @@ -623,17 +619,17 @@ implementation with special data types. The precomputed neighbors include one extra neighbor in a custom nearest neighbors estimator, since unnecessary neighbors will be filtered by following estimators. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_approximate_nearest_neighbors.py`: - an example of pipelining :class:`KNeighborsTransformer` and - :class:`~sklearn.manifold.TSNE`. Also proposes two custom nearest neighbors - estimators based on external packages. +* :ref:`sphx_glr_auto_examples_neighbors_approximate_nearest_neighbors.py`: + an example of pipelining :class:`KNeighborsTransformer` and + :class:`~sklearn.manifold.TSNE`. Also proposes two custom nearest neighbors + estimators based on external packages. - * :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py`: - an example of pipelining :class:`KNeighborsTransformer` and - :class:`KNeighborsClassifier` to enable caching of the neighbors graph - during a hyper-parameter grid-search. +* :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py`: + an example of pipelining :class:`KNeighborsTransformer` and + :class:`KNeighborsClassifier` to enable caching of the neighbors graph + during a hyper-parameter grid-search. .. _nca: @@ -757,11 +753,11 @@ by each method. Each data sample belongs to one of 10 classes. .. centered:: |nca_dim_reduction_1| |nca_dim_reduction_2| |nca_dim_reduction_3| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_nca_classification.py` - * :ref:`sphx_glr_auto_examples_neighbors_plot_nca_dim_reduction.py` - * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` +* :ref:`sphx_glr_auto_examples_neighbors_plot_nca_classification.py` +* :ref:`sphx_glr_auto_examples_neighbors_plot_nca_dim_reduction.py` +* :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` .. _nca_mathematical_formulation: @@ -794,18 +790,16 @@ space: p_{i j} = \frac{\exp(-||L x_i - L x_j||^2)}{\sum\limits_{k \ne i} {\exp{-(||L x_i - L x_k||^2)}}} , \quad p_{i i} = 0 +.. dropdown:: Mahalanobis distance -Mahalanobis distance -^^^^^^^^^^^^^^^^^^^^ + NCA can be seen as learning a (squared) Mahalanobis distance metric: -NCA can be seen as learning a (squared) Mahalanobis distance metric: - -.. math:: + .. math:: - || L(x_i - x_j)||^2 = (x_i - x_j)^TM(x_i - x_j), + || L(x_i - x_j)||^2 = (x_i - x_j)^TM(x_i - x_j), -where :math:`M = L^T L` is a symmetric positive semi-definite matrix of size -``(n_features, n_features)``. + where :math:`M = L^T L` is a symmetric positive semi-definite matrix of size + ``(n_features, n_features)``. Implementation @@ -838,12 +832,12 @@ complexity equals ``n_components * n_features * n_samples_test``. There is no added space complexity in the operation. -.. topic:: References: +.. rubric:: References - .. [1] `"Neighbourhood Components Analysis" - `_, - J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in - Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520. +.. [1] `"Neighbourhood Components Analysis" + `_, + J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in + Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520. - `Wikipedia entry on Neighborhood Components Analysis - `_ +* `Wikipedia entry on Neighborhood Components Analysis + `_ diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst index 995faa9e6d19c..5c6baecb7e2ff 100644 --- a/doc/modules/neural_networks_supervised.rst +++ b/doc/modules/neural_networks_supervised.rst @@ -20,7 +20,7 @@ Multi-layer Perceptron ====================== **Multi-layer Perceptron (MLP)** is a supervised learning algorithm that learns -a function :math:`f(\cdot): R^m \rightarrow R^o` by training on a dataset, +a function :math:`f: R^m \rightarrow R^o` by training on a dataset, where :math:`m` is the number of dimensions for input and :math:`o` is the number of dimensions for output. Given a set of features :math:`X = {x_1, x_2, ..., x_m}` and a target :math:`y`, it can learn a non-linear function approximator for either @@ -49,27 +49,29 @@ The module contains the public attributes ``coefs_`` and ``intercepts_``. :math:`i+1`. ``intercepts_`` is a list of bias vectors, where the vector at index :math:`i` represents the bias values added to layer :math:`i+1`. -The advantages of Multi-layer Perceptron are: +.. dropdown:: Advantages and disadvantages of Multi-layer Perceptron - + Capability to learn non-linear models. + The advantages of Multi-layer Perceptron are: - + Capability to learn models in real-time (on-line learning) - using ``partial_fit``. + + Capability to learn non-linear models. + + Capability to learn models in real-time (on-line learning) + using ``partial_fit``. -The disadvantages of Multi-layer Perceptron (MLP) include: - + MLP with hidden layers have a non-convex loss function where there exists - more than one local minimum. Therefore different random weight - initializations can lead to different validation accuracy. + The disadvantages of Multi-layer Perceptron (MLP) include: - + MLP requires tuning a number of hyperparameters such as the number of - hidden neurons, layers, and iterations. + + MLP with hidden layers have a non-convex loss function where there exists + more than one local minimum. Therefore different random weight + initializations can lead to different validation accuracy. - + MLP is sensitive to feature scaling. + + MLP requires tuning a number of hyperparameters such as the number of + hidden neurons, layers, and iterations. -Please see :ref:`Tips on Practical Use ` section that addresses -some of these disadvantages. + + MLP is sensitive to feature scaling. + + Please see :ref:`Tips on Practical Use ` section that addresses + some of these disadvantages. Classification @@ -143,10 +145,11 @@ indices where the value is `1` represents the assigned classes of that sample:: See the examples below and the docstring of :meth:`MLPClassifier.fit` for further information. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_training_curves.py` - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` +* :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_training_curves.py` +* See :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` for + visualized representation of trained weights. Regression ========== @@ -175,9 +178,9 @@ decision function with value of alpha. See the examples below for further information. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_alpha.py` +* :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_alpha.py` Algorithms ========== @@ -223,87 +226,82 @@ Complexity Suppose there are :math:`n` training samples, :math:`m` features, :math:`k` hidden layers, each containing :math:`h` neurons - for simplicity, and :math:`o` output neurons. The time complexity of backpropagation is -:math:`O(n\cdot m \cdot h^k \cdot o \cdot i)`, where :math:`i` is the number +:math:`O(i \cdot n \cdot (m \cdot h + (k - 1) \cdot h \cdot h + h \cdot o))`, where :math:`i` is the number of iterations. Since backpropagation has a high time complexity, it is advisable to start with smaller number of hidden neurons and few hidden layers for training. +.. dropdown:: Mathematical formulation -Mathematical formulation -======================== - -Given a set of training examples :math:`(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)` -where :math:`x_i \in \mathbf{R}^n` and :math:`y_i \in \{0, 1\}`, a one hidden -layer one hidden neuron MLP learns the function :math:`f(x) = W_2 g(W_1^T x + b_1) + b_2` -where :math:`W_1 \in \mathbf{R}^m` and :math:`W_2, b_1, b_2 \in \mathbf{R}` are -model parameters. :math:`W_1, W_2` represent the weights of the input layer and -hidden layer, respectively; and :math:`b_1, b_2` represent the bias added to -the hidden layer and the output layer, respectively. -:math:`g(\cdot) : R \rightarrow R` is the activation function, set by default as -the hyperbolic tan. It is given as, - -.. math:: - g(z)= \frac{e^z-e^{-z}}{e^z+e^{-z}} - -For binary classification, :math:`f(x)` passes through the logistic function -:math:`g(z)=1/(1+e^{-z})` to obtain output values between zero and one. A -threshold, set to 0.5, would assign samples of outputs larger or equal 0.5 -to the positive class, and the rest to the negative class. - -If there are more than two classes, :math:`f(x)` itself would be a vector of -size (n_classes,). Instead of passing through logistic function, it passes -through the softmax function, which is written as, + Given a set of training examples :math:`(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)` + where :math:`x_i \in \mathbf{R}^n` and :math:`y_i \in \{0, 1\}`, a one hidden + layer one hidden neuron MLP learns the function :math:`f(x) = W_2 g(W_1^T x + b_1) + b_2` + where :math:`W_1 \in \mathbf{R}^m` and :math:`W_2, b_1, b_2 \in \mathbf{R}` are + model parameters. :math:`W_1, W_2` represent the weights of the input layer and + hidden layer, respectively; and :math:`b_1, b_2` represent the bias added to + the hidden layer and the output layer, respectively. + :math:`g(\cdot) : R \rightarrow R` is the activation function, set by default as + the hyperbolic tan. It is given as, -.. math:: - \text{softmax}(z)_i = \frac{\exp(z_i)}{\sum_{l=1}^k\exp(z_l)} + .. math:: + g(z)= \frac{e^z-e^{-z}}{e^z+e^{-z}} -where :math:`z_i` represents the :math:`i` th element of the input to softmax, -which corresponds to class :math:`i`, and :math:`K` is the number of classes. -The result is a vector containing the probabilities that sample :math:`x` -belong to each class. The output is the class with the highest probability. + For binary classification, :math:`f(x)` passes through the logistic function + :math:`g(z)=1/(1+e^{-z})` to obtain output values between zero and one. A + threshold, set to 0.5, would assign samples of outputs larger or equal 0.5 + to the positive class, and the rest to the negative class. -In regression, the output remains as :math:`f(x)`; therefore, output activation -function is just the identity function. + If there are more than two classes, :math:`f(x)` itself would be a vector of + size (n_classes,). Instead of passing through logistic function, it passes + through the softmax function, which is written as, -MLP uses different loss functions depending on the problem type. The loss -function for classification is Average Cross-Entropy, which in binary case is -given as, + .. math:: + \text{softmax}(z)_i = \frac{\exp(z_i)}{\sum_{l=1}^k\exp(z_l)} -.. math:: + where :math:`z_i` represents the :math:`i` th element of the input to softmax, + which corresponds to class :math:`i`, and :math:`K` is the number of classes. + The result is a vector containing the probabilities that sample :math:`x` + belong to each class. The output is the class with the highest probability. - Loss(\hat{y},y,W) = -\dfrac{1}{n}\sum_{i=0}^n(y_i \ln {\hat{y_i}} + (1-y_i) \ln{(1-\hat{y_i})}) + \dfrac{\alpha}{2n} ||W||_2^2 + In regression, the output remains as :math:`f(x)`; therefore, output activation + function is just the identity function. -where :math:`\alpha ||W||_2^2` is an L2-regularization term (aka penalty) -that penalizes complex models; and :math:`\alpha > 0` is a non-negative -hyperparameter that controls the magnitude of the penalty. + MLP uses different loss functions depending on the problem type. The loss + function for classification is Average Cross-Entropy, which in binary case is + given as, -For regression, MLP uses the Mean Square Error loss function; written as, + .. math:: -.. math:: + Loss(\hat{y},y,W) = -\dfrac{1}{n}\sum_{i=0}^n(y_i \ln {\hat{y_i}} + (1-y_i) \ln{(1-\hat{y_i})}) + \dfrac{\alpha}{2n} ||W||_2^2 - Loss(\hat{y},y,W) = \frac{1}{2n}\sum_{i=0}^n||\hat{y}_i - y_i ||_2^2 + \frac{\alpha}{2n} ||W||_2^2 + where :math:`\alpha ||W||_2^2` is an L2-regularization term (aka penalty) + that penalizes complex models; and :math:`\alpha > 0` is a non-negative + hyperparameter that controls the magnitude of the penalty. + For regression, MLP uses the Mean Square Error loss function; written as, -Starting from initial random weights, multi-layer perceptron (MLP) minimizes -the loss function by repeatedly updating these weights. After computing the -loss, a backward pass propagates it from the output layer to the previous -layers, providing each weight parameter with an update value meant to decrease -the loss. + .. math:: -In gradient descent, the gradient :math:`\nabla Loss_{W}` of the loss with respect -to the weights is computed and deducted from :math:`W`. -More formally, this is expressed as, + Loss(\hat{y},y,W) = \frac{1}{2n}\sum_{i=0}^n||\hat{y}_i - y_i ||_2^2 + \frac{\alpha}{2n} ||W||_2^2 -.. math:: - W^{i+1} = W^i - \epsilon \nabla {Loss}_{W}^{i} + Starting from initial random weights, multi-layer perceptron (MLP) minimizes + the loss function by repeatedly updating these weights. After computing the + loss, a backward pass propagates it from the output layer to the previous + layers, providing each weight parameter with an update value meant to decrease + the loss. + In gradient descent, the gradient :math:`\nabla Loss_{W}` of the loss with respect + to the weights is computed and deducted from :math:`W`. + More formally, this is expressed as, -where :math:`i` is the iteration step, and :math:`\epsilon` is the learning rate -with a value larger than 0. + .. math:: + W^{i+1} = W^i - \epsilon \nabla {Loss}_{W}^{i} -The algorithm stops when it reaches a preset maximum number of iterations; or -when the improvement in loss is below a certain, small number. + where :math:`i` is the iteration step, and :math:`\epsilon` is the learning rate + with a value larger than 0. + The algorithm stops when it reaches a preset maximum number of iterations; or + when the improvement in loss is below a certain, small number. .. _mlp_tips: @@ -311,34 +309,35 @@ when the improvement in loss is below a certain, small number. Tips on Practical Use ===================== - * Multi-layer Perceptron is sensitive to feature scaling, so it - is highly recommended to scale your data. For example, scale each - attribute on the input vector X to [0, 1] or [-1, +1], or standardize - it to have mean 0 and variance 1. Note that you must apply the *same* - scaling to the test set for meaningful results. - You can use :class:`StandardScaler` for standardization. - - >>> from sklearn.preprocessing import StandardScaler # doctest: +SKIP - >>> scaler = StandardScaler() # doctest: +SKIP - >>> # Don't cheat - fit only on training data - >>> scaler.fit(X_train) # doctest: +SKIP - >>> X_train = scaler.transform(X_train) # doctest: +SKIP - >>> # apply same transformation to test data - >>> X_test = scaler.transform(X_test) # doctest: +SKIP - - An alternative and recommended approach is to use :class:`StandardScaler` - in a :class:`Pipeline` - - * Finding a reasonable regularization parameter :math:`\alpha` is - best done using :class:`GridSearchCV`, usually in the - range ``10.0 ** -np.arange(1, 7)``. - - * Empirically, we observed that `L-BFGS` converges faster and - with better solutions on small datasets. For relatively large - datasets, however, `Adam` is very robust. It usually converges - quickly and gives pretty good performance. `SGD` with momentum or - nesterov's momentum, on the other hand, can perform better than - those two algorithms if learning rate is correctly tuned. +* Multi-layer Perceptron is sensitive to feature scaling, so it + is highly recommended to scale your data. For example, scale each + attribute on the input vector X to [0, 1] or [-1, +1], or standardize + it to have mean 0 and variance 1. Note that you must apply the *same* + scaling to the test set for meaningful results. + You can use :class:`~sklearn.preprocessing.StandardScaler` for standardization. + + >>> from sklearn.preprocessing import StandardScaler # doctest: +SKIP + >>> scaler = StandardScaler() # doctest: +SKIP + >>> # Don't cheat - fit only on training data + >>> scaler.fit(X_train) # doctest: +SKIP + >>> X_train = scaler.transform(X_train) # doctest: +SKIP + >>> # apply same transformation to test data + >>> X_test = scaler.transform(X_test) # doctest: +SKIP + + An alternative and recommended approach is to use + :class:`~sklearn.preprocessing.StandardScaler` in a + :class:`~sklearn.pipeline.Pipeline` + +* Finding a reasonable regularization parameter :math:`\alpha` is best done + using :class:`~sklearn.model_selection.GridSearchCV`, usually in the range + ``10.0 ** -np.arange(1, 7)``. + +* Empirically, we observed that `L-BFGS` converges faster and + with better solutions on small datasets. For relatively large + datasets, however, `Adam` is very robust. It usually converges + quickly and gives pretty good performance. `SGD` with momentum or + nesterov's momentum, on the other hand, can perform better than + those two algorithms if learning rate is correctly tuned. More control with warm_start ============================ @@ -354,21 +353,19 @@ or want to do additional monitoring, using ``warm_start=True`` and ... # additional monitoring / inspection MLPClassifier(... -.. topic:: References: +.. dropdown:: References - * `"Learning representations by back-propagating errors." - `_ - Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams. + * `"Learning representations by back-propagating errors." + `_ + Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams. - * `"Stochastic Gradient Descent" `_ L. Bottou - Website, 2010. + * `"Stochastic Gradient Descent" `_ L. Bottou - Website, 2010. - * `"Backpropagation" `_ - Andrew Ng, Jiquan Ngiam, Chuan Yu Foo, Yifan Mai, Caroline Suen - Website, 2011. + * `"Backpropagation" `_ + Andrew Ng, Jiquan Ngiam, Chuan Yu Foo, Yifan Mai, Caroline Suen - Website, 2011. - * `"Efficient BackProp" `_ - Y. LeCun, L. Bottou, G. Orr, K. MÃŧller - In Neural Networks: Tricks - of the Trade 1998. + * `"Efficient BackProp" `_ + Y. LeCun, L. Bottou, G. Orr, K. MÃŧller - In Neural Networks: Tricks of the Trade 1998. - * :arxiv:`"Adam: A method for stochastic optimization." - <1412.6980>` - Kingma, Diederik, and Jimmy Ba (2014) + * :arxiv:`"Adam: A method for stochastic optimization." <1412.6980>` + Kingma, Diederik, and Jimmy Ba (2014) diff --git a/doc/modules/neural_networks_unsupervised.rst b/doc/modules/neural_networks_unsupervised.rst index aca56ae8aaf2e..7f6c0016d183b 100644 --- a/doc/modules/neural_networks_unsupervised.rst +++ b/doc/modules/neural_networks_unsupervised.rst @@ -37,9 +37,9 @@ weights of independent RBMs. This method is known as unsupervised pre-training. :align: center :scale: 100% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_rbm_logistic_classification.py` +* :ref:`sphx_glr_auto_examples_neural_networks_plot_rbm_logistic_classification.py` Graphical model and parametrization @@ -57,7 +57,7 @@ visible and hidden unit, omitted from the image for simplicity. The energy function measures the quality of a joint assignment: -.. math:: +.. math:: E(\mathbf{v}, \mathbf{h}) = -\sum_i \sum_j w_{ij}v_ih_j - \sum_i b_iv_i - \sum_j c_jh_j @@ -149,13 +149,13 @@ step, in PCD we keep a number of chains (fantasy particles) that are updated :math:`k` Gibbs steps after each weight update. This allows the particles to explore the space more thoroughly. -.. topic:: References: +.. rubric:: References - * `"A fast learning algorithm for deep belief nets" - `_ - G. Hinton, S. Osindero, Y.-W. Teh, 2006 +* `"A fast learning algorithm for deep belief nets" + `_, + G. Hinton, S. Osindero, Y.-W. Teh, 2006 - * `"Training Restricted Boltzmann Machines using Approximations to - the Likelihood Gradient" - `_ - T. Tieleman, 2008 +* `"Training Restricted Boltzmann Machines using Approximations to + the Likelihood Gradient" + `_, + T. Tieleman, 2008 diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst index 572674328108d..0c6891ed119bd 100644 --- a/doc/modules/outlier_detection.rst +++ b/doc/modules/outlier_detection.rst @@ -123,19 +123,19 @@ refer to the example :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` and the sections hereunder. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison of the :class:`svm.OneClassSVM`, the - :class:`ensemble.IsolationForest`, the - :class:`neighbors.LocalOutlierFactor` and - :class:`covariance.EllipticEnvelope`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison of the :class:`svm.OneClassSVM`, the + :class:`ensemble.IsolationForest`, the + :class:`neighbors.LocalOutlierFactor` and + :class:`covariance.EllipticEnvelope`. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_outlier_detection_bench.py` - for an example showing how to evaluate outlier detection estimators, - the :class:`neighbors.LocalOutlierFactor` and the - :class:`ensemble.IsolationForest`, using ROC curves from - :class:`metrics.RocCurveDisplay`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_outlier_detection_bench.py` + for an example showing how to evaluate outlier detection estimators, + the :class:`neighbors.LocalOutlierFactor` and the + :class:`ensemble.IsolationForest`, using ROC curves from + :class:`metrics.RocCurveDisplay`. Novelty Detection ================= @@ -167,18 +167,18 @@ implementation. The `nu` parameter, also known as the margin of the One-Class SVM, corresponds to the probability of finding a new, but regular, observation outside the frontier. -.. topic:: References: +.. rubric:: References - * `Estimating the support of a high-dimensional distribution - `_ - SchÃļlkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471. +* `Estimating the support of a high-dimensional distribution + `_ + SchÃļlkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py` for visualizing the - frontier learned around some data by a - :class:`svm.OneClassSVM` object. - * :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` +* See :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py` for visualizing the + frontier learned around some data by a :class:`svm.OneClassSVM` object. + +* :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` .. figure:: ../auto_examples/svm/images/sphx_glr_plot_oneclass_001.png :target: ../auto_examples/svm/plot_oneclass.html @@ -196,11 +196,11 @@ approximate the solution of a kernelized :class:`svm.OneClassSVM` whose complexity is at best quadratic in the number of samples. See section :ref:`sgd_online_one_class_svm` for more details. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py` - for an illustration of the approximation of a kernelized One-Class SVM - with the `linear_model.SGDOneClassSVM` combined with kernel approximation. +* See :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py` + for an illustration of the approximation of a kernelized One-Class SVM + with the `linear_model.SGDOneClassSVM` combined with kernel approximation. Outlier Detection @@ -238,18 +238,18 @@ This strategy is illustrated below. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` for - an illustration of the difference between using a standard - (:class:`covariance.EmpiricalCovariance`) or a robust estimate - (:class:`covariance.MinCovDet`) of location and covariance to - assess the degree of outlyingness of an observation. +* See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` for + an illustration of the difference between using a standard + (:class:`covariance.EmpiricalCovariance`) or a robust estimate + (:class:`covariance.MinCovDet`) of location and covariance to + assess the degree of outlyingness of an observation. -.. topic:: References: +.. rubric:: References - * Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum - covariance determinant estimator" Technometrics 41(3), 212 (1999) +* Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum + covariance determinant estimator" Technometrics 41(3), 212 (1999) .. _isolation_forest: @@ -299,22 +299,22 @@ allows you to add more trees to an already fitted model:: >>> clf.set_params(n_estimators=20) # add 10 more trees # doctest: +SKIP >>> clf.fit(X) # fit the added trees # doctest: +SKIP -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py` for - an illustration of the use of IsolationForest. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py` for + an illustration of the use of IsolationForest. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison of :class:`ensemble.IsolationForest` with - :class:`neighbors.LocalOutlierFactor`, - :class:`svm.OneClassSVM` (tuned to perform like an outlier detection - method), :class:`linear_model.SGDOneClassSVM`, and a covariance-based - outlier detection with :class:`covariance.EllipticEnvelope`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison of :class:`ensemble.IsolationForest` with + :class:`neighbors.LocalOutlierFactor`, + :class:`svm.OneClassSVM` (tuned to perform like an outlier detection + method), :class:`linear_model.SGDOneClassSVM`, and a covariance-based + outlier detection with :class:`covariance.EllipticEnvelope`. -.. topic:: References: +.. rubric:: References - * Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." - Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. +* Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." + Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. .. _local_outlier_factor: @@ -370,20 +370,20 @@ This strategy is illustrated below. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py` - for an illustration of the use of :class:`neighbors.LocalOutlierFactor`. +* See :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py` + for an illustration of the use of :class:`neighbors.LocalOutlierFactor`. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison with other anomaly detection methods. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison with other anomaly detection methods. -.. topic:: References: +.. rubric:: References - * Breunig, Kriegel, Ng, and Sander (2000) - `LOF: identifying density-based local outliers. - `_ - Proc. ACM SIGMOD +* Breunig, Kriegel, Ng, and Sander (2000) + `LOF: identifying density-based local outliers. + `_ + Proc. ACM SIGMOD .. _novelty_with_lof: @@ -411,7 +411,7 @@ Note that ``fit_predict`` is not available in this case to avoid inconsistencies Novelty detection with Local Outlier Factor is illustrated below. - .. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png - :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html - :align: center - :scale: 75% +.. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png + :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html + :align: center + :scale: 75% diff --git a/doc/modules/partial_dependence.rst b/doc/modules/partial_dependence.rst index 7ce099f2342e9..40f691a9e6dcc 100644 --- a/doc/modules/partial_dependence.rst +++ b/doc/modules/partial_dependence.rst @@ -79,19 +79,21 @@ parameter takes a list of indices, names of the categorical features or a boolea mask. The graphical representation of partial dependence for categorical features is a bar plot or a 2D heatmap. -For multi-class classification, you need to set the class label for which -the PDPs should be created via the ``target`` argument:: - - >>> from sklearn.datasets import load_iris - >>> iris = load_iris() - >>> mc_clf = GradientBoostingClassifier(n_estimators=10, - ... max_depth=1).fit(iris.data, iris.target) - >>> features = [3, 2, (3, 2)] - >>> PartialDependenceDisplay.from_estimator(mc_clf, X, features, target=0) - <...> +.. dropdown:: PDPs for multi-class classification + + For multi-class classification, you need to set the class label for which + the PDPs should be created via the ``target`` argument:: + + >>> from sklearn.datasets import load_iris + >>> iris = load_iris() + >>> mc_clf = GradientBoostingClassifier(n_estimators=10, + ... max_depth=1).fit(iris.data, iris.target) + >>> features = [3, 2, (3, 2)] + >>> PartialDependenceDisplay.from_estimator(mc_clf, X, features, target=0) + <...> -The same parameter ``target`` is used to specify the target in multi-output -regression settings. + The same parameter ``target`` is used to specify the target in multi-output + regression settings. If you need the raw values of the partial dependence function rather than the plots, you can use the @@ -102,7 +104,7 @@ the plots, you can use the >>> results = partial_dependence(clf, X, [0]) >>> results["average"] array([[ 2.466..., 2.466..., ... - >>> results["values"] + >>> results["grid_values"] [array([-1.624..., -1.592..., ... The values at which the partial dependence should be evaluated are directly @@ -260,9 +262,9 @@ estimators that support it, and 'brute' is used for the rest. interpreting PDPs is that the features should be independent. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` .. rubric:: Footnotes @@ -270,21 +272,20 @@ estimators that support it, and 'brute' is used for the rest. class (the positive class for binary classification), or the decision function. -.. topic:: References +.. rubric:: References - .. [H2009] T. Hastie, R. Tibshirani and J. Friedman, - `The Elements of Statistical Learning - `_, - Second Edition, Section 10.13.2, Springer, 2009. +.. [H2009] T. Hastie, R. Tibshirani and J. Friedman, + `The Elements of Statistical Learning + `_, + Second Edition, Section 10.13.2, Springer, 2009. - .. [M2019] C. Molnar, - `Interpretable Machine Learning - `_, - Section 5.1, 2019. +.. [M2019] C. Molnar, + `Interpretable Machine Learning + `_, + Section 5.1, 2019. - .. [G2015] :arxiv:`A. Goldstein, A. Kapelner, J. Bleich, and E. Pitkin, - "Peeking Inside the Black Box: Visualizing Statistical - Learning With Plots of Individual Conditional Expectation" - Journal of Computational and Graphical Statistics, - 24(1): 44-65, Springer, 2015. - <1309.6392>` +.. [G2015] :arxiv:`A. Goldstein, A. Kapelner, J. Bleich, and E. Pitkin, + "Peeking Inside the Black Box: Visualizing Statistical + Learning With Plots of Individual Conditional Expectation" + Journal of Computational and Graphical Statistics, + 24(1): 44-65, Springer, 2015. <1309.6392>` diff --git a/doc/modules/permutation_importance.rst b/doc/modules/permutation_importance.rst index f2530aac3a388..12a20a8bcaa6c 100644 --- a/doc/modules/permutation_importance.rst +++ b/doc/modules/permutation_importance.rst @@ -6,15 +6,45 @@ Permutation feature importance .. currentmodule:: sklearn.inspection -Permutation feature importance is a model inspection technique that can be used -for any :term:`fitted` :term:`estimator` when the data is tabular. This is -especially useful for non-linear or opaque :term:`estimators`. The permutation -feature importance is defined to be the decrease in a model score when a single -feature value is randomly shuffled [1]_. This procedure breaks the relationship -between the feature and the target, thus the drop in the model score is -indicative of how much the model depends on the feature. This technique -benefits from being model agnostic and can be calculated many times with -different permutations of the feature. +Permutation feature importance is a model inspection technique that measures the +contribution of each feature to a :term:`fitted` model's statistical performance +on a given tabular dataset. This technique is particularly useful for non-linear +or opaque :term:`estimators`, and involves randomly shuffling the values of a +single feature and observing the resulting degradation of the model's score +[1]_. By breaking the relationship between the feature and the target, we +determine how much the model relies on such particular feature. + +In the following figures, we observe the effect of permuting features on the correlation +between the feature and the target and consequently on the model statistical +performance. + +.. image:: ../images/permuted_predictive_feature.png + :align: center + +.. image:: ../images/permuted_non_predictive_feature.png + :align: center + +On the top figure, we observe that permuting a predictive feature breaks the +correlation between the feature and the target, and consequently the model +statistical performance decreases. On the bottom figure, we observe that permuting +a non-predictive feature does not significantly degrade the model statistical performance. + +One key advantage of permutation feature importance is that it is +model-agnostic, i.e. it can be applied to any fitted estimator. Moreover, it can +be calculated multiple times with different permutations of the feature, further +providing a measure of the variance in the estimated feature importances for the +specific trained model. + +The figure below shows the permutation feature importance of a +:class:`~sklearn.ensemble.RandomForestClassifier` trained on an augmented +version of the titanic dataset that contains a `random_cat` and a `random_num` +features, i.e. a categrical and a numerical feature that are not correlated in +any way with the target variable: + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_002.png + :target: ../auto_examples/inspection/plot_permutation_importance.html + :align: center + :scale: 70 .. warning:: @@ -74,49 +104,50 @@ highlight which features contribute the most to the generalization power of the inspected model. Features that are important on the training set but not on the held-out set might cause the model to overfit. -The permutation feature importance is the decrease in a model score when a single -feature value is randomly shuffled. The score function to be used for the -computation of importances can be specified with the `scoring` argument, -which also accepts multiple scorers. Using multiple scorers is more computationally -efficient than sequentially calling :func:`permutation_importance` several times -with a different scorer, as it reuses model predictions. - -An example of using multiple scorers is shown below, employing a list of metrics, -but more input formats are possible, as documented in :ref:`multimetric_scoring`. - - >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'] - >>> r_multi = permutation_importance( - ... model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring) - ... - >>> for metric in r_multi: - ... print(f"{metric}") - ... r = r_multi[metric] - ... for i in r.importances_mean.argsort()[::-1]: - ... if r.importances_mean[i] - 2 * r.importances_std[i] > 0: - ... print(f" {diabetes.feature_names[i]:<8}" - ... f"{r.importances_mean[i]:.3f}" - ... f" +/- {r.importances_std[i]:.3f}") - ... - r2 - s5 0.204 +/- 0.050 - bmi 0.176 +/- 0.048 - bp 0.088 +/- 0.033 - sex 0.056 +/- 0.023 - neg_mean_absolute_percentage_error - s5 0.081 +/- 0.020 - bmi 0.064 +/- 0.015 - bp 0.029 +/- 0.010 - neg_mean_squared_error - s5 1013.866 +/- 246.445 - bmi 872.726 +/- 240.298 - bp 438.663 +/- 163.022 - sex 277.376 +/- 115.123 - -The ranking of the features is approximately the same for different metrics even -if the scales of the importance values are very different. However, this is not -guaranteed and different metrics might lead to significantly different feature -importances, in particular for models trained for imbalanced classification problems, -for which the choice of the classification metric can be critical. +The permutation feature importance depends on the score function that is +specified with the `scoring` argument. This argument accepts multiple scorers, +which is more computationally efficient than sequentially calling +:func:`permutation_importance` several times with a different scorer, as it +reuses model predictions. + +.. dropdown:: Example of permutation feature importance using multiple scorers + + In the example below we use a list of metrics, but more input formats are + possible, as documented in :ref:`multimetric_scoring`. + + >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'] + >>> r_multi = permutation_importance( + ... model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring) + ... + >>> for metric in r_multi: + ... print(f"{metric}") + ... r = r_multi[metric] + ... for i in r.importances_mean.argsort()[::-1]: + ... if r.importances_mean[i] - 2 * r.importances_std[i] > 0: + ... print(f" {diabetes.feature_names[i]:<8}" + ... f"{r.importances_mean[i]:.3f}" + ... f" +/- {r.importances_std[i]:.3f}") + ... + r2 + s5 0.204 +/- 0.050 + bmi 0.176 +/- 0.048 + bp 0.088 +/- 0.033 + sex 0.056 +/- 0.023 + neg_mean_absolute_percentage_error + s5 0.081 +/- 0.020 + bmi 0.064 +/- 0.015 + bp 0.029 +/- 0.010 + neg_mean_squared_error + s5 1013.866 +/- 246.445 + bmi 872.726 +/- 240.298 + bp 438.663 +/- 163.022 + sex 277.376 +/- 115.123 + + The ranking of the features is approximately the same for different metrics even + if the scales of the importance values are very different. However, this is not + guaranteed and different metrics might lead to significantly different feature + importances, in particular for models trained for imbalanced classification problems, + for which **the choice of the classification metric can be critical**. Outline of the permutation importance algorithm ----------------------------------------------- @@ -156,9 +187,9 @@ over low cardinality features such as binary features or categorical variables with a small number of possible categories. Permutation-based feature importances do not exhibit such a bias. Additionally, -the permutation feature importance may be computed performance metric on the -model predictions and can be used to analyze any model class (not -just tree-based models). +the permutation feature importance may be computed with any performance metric +on the model predictions and can be used to analyze any model class (not just +tree-based models). The following example highlights the limitations of impurity-based feature importance in contrast to permutation-based feature importance: @@ -168,21 +199,37 @@ Misleading values on strongly correlated features ------------------------------------------------- When two features are correlated and one of the features is permuted, the model -will still have access to the feature through its correlated feature. This will -result in a lower importance value for both features, where they might -*actually* be important. +still has access to the latter through its correlated feature. This results in a +lower reported importance value for both features, though they might *actually* +be important. + +The figure below shows the permutation feature importance of a +:class:`~sklearn.ensemble.RandomForestClassifier` trained using the +:ref:`breast_cancer_dataset`, which contains strongly correlated features. A +naive interpretation would suggest that all features are unimportant: + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_multicollinear_002.png + :target: ../auto_examples/inspection/plot_permutation_importance_multicollinear.html + :align: center + :scale: 70 + +One way to handle the issue is to cluster features that are correlated and only +keep one feature from each cluster. + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_multicollinear_004.png + :target: ../auto_examples/inspection/plot_permutation_importance_multicollinear.html + :align: center + :scale: 70 -One way to handle this is to cluster features that are correlated and only -keep one feature from each cluster. This strategy is explored in the following -example: +For more details on such strategy, see the example :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py`. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py` -.. topic:: References: +.. rubric:: References - .. [1] L. Breiman, :doi:`"Random Forests" <10.1023/A:1010933404324>`, - Machine Learning, 45(1), 5-32, 2001. +.. [1] L. Breiman, :doi:`"Random Forests" <10.1023/A:1010933404324>`, + Machine Learning, 45(1), 5-32, 2001. diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index 69045147d8af9..90889ad5af7e0 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -10,9 +10,10 @@ The ``sklearn.preprocessing`` package provides several common utility functions and transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. -In general, learning algorithms benefit from standardization of the data set. If -some outliers are present in the set, robust scalers or transformers are more -appropriate. The behaviors of the different scalers, transformers, and +In general, many learning algorithms such as linear models benefit from standardization of the data set +(see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`). +If some outliers are present in the set, robust scalers or other transformers can +be more appropriate. The behaviors of the different scalers, transformers, and normalizers on a dataset containing marginal outliers is highlighted in :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`. @@ -219,13 +220,13 @@ of the data is likely to not work very well. In these cases, you can use more robust estimates for the center and range of your data. -.. topic:: References: +.. dropdown:: References Further discussion on the importance of centering and scaling data is available on this FAQ: `Should I normalize/standardize/rescale the data? `_ -.. topic:: Scaling vs Whitening +.. dropdown:: Scaling vs Whitening It is sometimes not enough to center and scale the features independently, since a downstream model can further make some assumption @@ -234,6 +235,7 @@ more robust estimates for the center and range of your data. To address this issue you can use :class:`~sklearn.decomposition.PCA` with ``whiten=True`` to further remove the linear correlation across features. + .. _kernel_centering: Centering kernel matrices @@ -247,53 +249,53 @@ followed by the removal of the mean in that space. In other words, :class:`KernelCenterer` computes the centered Gram matrix associated to a positive semidefinite kernel :math:`K`. -**Mathematical formulation** +.. dropdown:: Mathematical formulation -We can have a look at the mathematical formulation now that we have the -intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)` -computed from :math:`X`, a data matrix of shape `(n_samples, n_features)`, -during the `fit` step. :math:`K` is defined by + We can have a look at the mathematical formulation now that we have the + intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)` + computed from :math:`X`, a data matrix of shape `(n_samples, n_features)`, + during the `fit` step. :math:`K` is defined by -.. math:: - K(X, X) = \phi(X) . \phi(X)^{T} + .. math:: + K(X, X) = \phi(X) . \phi(X)^{T} -:math:`\phi(X)` is a function mapping of :math:`X` to a Hilbert space. A -centered kernel :math:`\tilde{K}` is defined as: + :math:`\phi(X)` is a function mapping of :math:`X` to a Hilbert space. A + centered kernel :math:`\tilde{K}` is defined as: -.. math:: - \tilde{K}(X, X) = \tilde{\phi}(X) . \tilde{\phi}(X)^{T} + .. math:: + \tilde{K}(X, X) = \tilde{\phi}(X) . \tilde{\phi}(X)^{T} -where :math:`\tilde{\phi}(X)` results from centering :math:`\phi(X)` in the -Hilbert space. + where :math:`\tilde{\phi}(X)` results from centering :math:`\phi(X)` in the + Hilbert space. -Thus, one could compute :math:`\tilde{K}` by mapping :math:`X` using the -function :math:`\phi(\cdot)` and center the data in this new space. However, -kernels are often used because they allows some algebra calculations that -avoid computing explicitly this mapping using :math:`\phi(\cdot)`. Indeed, one -can implicitly center as shown in Appendix B in [Scholkopf1998]_: + Thus, one could compute :math:`\tilde{K}` by mapping :math:`X` using the + function :math:`\phi(\cdot)` and center the data in this new space. However, + kernels are often used because they allows some algebra calculations that + avoid computing explicitly this mapping using :math:`\phi(\cdot)`. Indeed, one + can implicitly center as shown in Appendix B in [Scholkopf1998]_: -.. math:: - \tilde{K} = K - 1_{\text{n}_{samples}} K - K 1_{\text{n}_{samples}} + 1_{\text{n}_{samples}} K 1_{\text{n}_{samples}} + .. math:: + \tilde{K} = K - 1_{\text{n}_{samples}} K - K 1_{\text{n}_{samples}} + 1_{\text{n}_{samples}} K 1_{\text{n}_{samples}} -:math:`1_{\text{n}_{samples}}` is a matrix of `(n_samples, n_samples)` where -all entries are equal to :math:`\frac{1}{\text{n}_{samples}}`. In the -`transform` step, the kernel becomes :math:`K_{test}(X, Y)` defined as: + :math:`1_{\text{n}_{samples}}` is a matrix of `(n_samples, n_samples)` where + all entries are equal to :math:`\frac{1}{\text{n}_{samples}}`. In the + `transform` step, the kernel becomes :math:`K_{test}(X, Y)` defined as: -.. math:: - K_{test}(X, Y) = \phi(Y) . \phi(X)^{T} + .. math:: + K_{test}(X, Y) = \phi(Y) . \phi(X)^{T} -:math:`Y` is the test dataset of shape `(n_samples_test, n_features)` and thus -:math:`K_{test}` is of shape `(n_samples_test, n_samples)`. In this case, -centering :math:`K_{test}` is done as: + :math:`Y` is the test dataset of shape `(n_samples_test, n_features)` and thus + :math:`K_{test}` is of shape `(n_samples_test, n_samples)`. In this case, + centering :math:`K_{test}` is done as: -.. math:: - \tilde{K}_{test}(X, Y) = K_{test} - 1'_{\text{n}_{samples}} K - K_{test} 1_{\text{n}_{samples}} + 1'_{\text{n}_{samples}} K 1_{\text{n}_{samples}} + .. math:: + \tilde{K}_{test}(X, Y) = K_{test} - 1'_{\text{n}_{samples}} K - K_{test} 1_{\text{n}_{samples}} + 1'_{\text{n}_{samples}} K 1_{\text{n}_{samples}} -:math:`1'_{\text{n}_{samples}}` is a matrix of shape -`(n_samples_test, n_samples)` where all entries are equal to -:math:`\frac{1}{\text{n}_{samples}}`. + :math:`1'_{\text{n}_{samples}}` is a matrix of shape + `(n_samples_test, n_samples)` where all entries are equal to + :math:`\frac{1}{\text{n}_{samples}}`. -.. topic:: References + .. rubric:: References .. [Scholkopf1998] B. SchÃļlkopf, A. Smola, and K.R. MÃŧller, `"Nonlinear component analysis as a kernel eigenvalue problem." @@ -371,46 +373,46 @@ possible in order to stabilize variance and minimize skewness. :class:`PowerTransformer` currently provides two such power transformations, the Yeo-Johnson transform and the Box-Cox transform. -The Yeo-Johnson transform is given by: - -.. math:: - x_i^{(\lambda)} = - \begin{cases} - [(x_i + 1)^\lambda - 1] / \lambda & \text{if } \lambda \neq 0, x_i \geq 0, \\[8pt] - \ln{(x_i + 1)} & \text{if } \lambda = 0, x_i \geq 0 \\[8pt] - -[(-x_i + 1)^{2 - \lambda} - 1] / (2 - \lambda) & \text{if } \lambda \neq 2, x_i < 0, \\[8pt] - - \ln (- x_i + 1) & \text{if } \lambda = 2, x_i < 0 - \end{cases} - -while the Box-Cox transform is given by: - -.. math:: - x_i^{(\lambda)} = - \begin{cases} - \dfrac{x_i^\lambda - 1}{\lambda} & \text{if } \lambda \neq 0, \\[8pt] - \ln{(x_i)} & \text{if } \lambda = 0, - \end{cases} - - -Box-Cox can only be applied to strictly positive data. In both methods, the -transformation is parameterized by :math:`\lambda`, which is determined through -maximum likelihood estimation. Here is an example of using Box-Cox to map -samples drawn from a lognormal distribution to a normal distribution:: - - >>> pt = preprocessing.PowerTransformer(method='box-cox', standardize=False) - >>> X_lognormal = np.random.RandomState(616).lognormal(size=(3, 3)) - >>> X_lognormal - array([[1.28..., 1.18..., 0.84...], - [0.94..., 1.60..., 0.38...], - [1.35..., 0.21..., 1.09...]]) - >>> pt.fit_transform(X_lognormal) - array([[ 0.49..., 0.17..., -0.15...], - [-0.05..., 0.58..., -0.57...], - [ 0.69..., -0.84..., 0.10...]]) - -While the above example sets the `standardize` option to `False`, -:class:`PowerTransformer` will apply zero-mean, unit-variance normalization -to the transformed output by default. +.. dropdown:: Yeo-Johnson transform + + .. math:: + x_i^{(\lambda)} = + \begin{cases} + [(x_i + 1)^\lambda - 1] / \lambda & \text{if } \lambda \neq 0, x_i \geq 0, \\[8pt] + \ln{(x_i + 1)} & \text{if } \lambda = 0, x_i \geq 0 \\[8pt] + -[(-x_i + 1)^{2 - \lambda} - 1] / (2 - \lambda) & \text{if } \lambda \neq 2, x_i < 0, \\[8pt] + - \ln (- x_i + 1) & \text{if } \lambda = 2, x_i < 0 + \end{cases} + +.. dropdown:: Box-Cox transform + + .. math:: + x_i^{(\lambda)} = + \begin{cases} + \dfrac{x_i^\lambda - 1}{\lambda} & \text{if } \lambda \neq 0, \\[8pt] + \ln{(x_i)} & \text{if } \lambda = 0, + \end{cases} + + Box-Cox can only be applied to strictly positive data. In both methods, the + transformation is parameterized by :math:`\lambda`, which is determined through + maximum likelihood estimation. Here is an example of using Box-Cox to map + samples drawn from a lognormal distribution to a normal distribution:: + + >>> pt = preprocessing.PowerTransformer(method='box-cox', standardize=False) + >>> X_lognormal = np.random.RandomState(616).lognormal(size=(3, 3)) + >>> X_lognormal + array([[1.28..., 1.18..., 0.84...], + [0.94..., 1.60..., 0.38...], + [1.35..., 0.21..., 1.09...]]) + >>> pt.fit_transform(X_lognormal) + array([[ 0.49..., 0.17..., -0.15...], + [-0.05..., 0.58..., -0.57...], + [ 0.69..., -0.84..., 0.10...]]) + + While the above example sets the `standardize` option to `False`, + :class:`PowerTransformer` will apply zero-mean, unit-variance normalization + to the transformed output by default. + Below are examples of Box-Cox and Yeo-Johnson applied to various probability distributions. Note that when applied to certain distributions, the power @@ -498,7 +500,7 @@ The normalizer instance can then be used on sample vectors as any transformer:: Note: L2 normalization is also known as spatial sign preprocessing. -.. topic:: Sparse input +.. dropdown:: Sparse input :func:`normalize` and :class:`Normalizer` accept **both dense array-like and sparse matrices from scipy.sparse as input**. @@ -512,6 +514,7 @@ Note: L2 normalization is also known as spatial sign preprocessing. Encoding categorical features ============================= + Often features are not given as continuous values but categorical. For example a person could have features ``["male", "female"]``, ``["from Europe", "from US", "from Asia"]``, @@ -698,36 +701,39 @@ not dropped:: >>> drop_enc.inverse_transform(X_trans) array([['female', None, None]], dtype=object) -:class:`OneHotEncoder` supports categorical features with missing values by -considering the missing values as an additional category:: +.. dropdown:: Support of categorical features with missing values - >>> X = [['male', 'Safari'], - ... ['female', None], - ... [np.nan, 'Firefox']] - >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) - >>> enc.categories_ - [array(['female', 'male', nan], dtype=object), - array(['Firefox', 'Safari', None], dtype=object)] - >>> enc.transform(X).toarray() - array([[0., 1., 0., 0., 1., 0.], - [1., 0., 0., 0., 0., 1.], - [0., 0., 1., 1., 0., 0.]]) - -If a feature contains both `np.nan` and `None`, they will be considered -separate categories:: - - >>> X = [['Safari'], [None], [np.nan], ['Firefox']] - >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) - >>> enc.categories_ - [array(['Firefox', 'Safari', None, nan], dtype=object)] - >>> enc.transform(X).toarray() - array([[0., 1., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.], - [1., 0., 0., 0.]]) + :class:`OneHotEncoder` supports categorical features with missing values by + considering the missing values as an additional category:: + + >>> X = [['male', 'Safari'], + ... ['female', None], + ... [np.nan, 'Firefox']] + >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) + >>> enc.categories_ + [array(['female', 'male', nan], dtype=object), + array(['Firefox', 'Safari', None], dtype=object)] + >>> enc.transform(X).toarray() + array([[0., 1., 0., 0., 1., 0.], + [1., 0., 0., 0., 0., 1.], + [0., 0., 1., 1., 0., 0.]]) + + If a feature contains both `np.nan` and `None`, they will be considered + separate categories:: + + >>> X = [['Safari'], [None], [np.nan], ['Firefox']] + >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) + >>> enc.categories_ + [array(['Firefox', 'Safari', None, nan], dtype=object)] + >>> enc.transform(X).toarray() + array([[0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]]) + + See :ref:`dict_feature_extraction` for categorical features that are + represented as a dict, not as scalars. -See :ref:`dict_feature_extraction` for categorical features that are -represented as a dict, not as scalars. .. _encoder_infrequent_categories: @@ -879,58 +885,85 @@ feature for encoding unordered categories, i.e. nominal categories [PAR]_ [MIC]_. This encoding scheme is useful with categorical features with high cardinality, where one-hot encoding would inflate the feature space making it more expensive for a downstream model to process. A classical example of high -cardinality categories are location based such as zip code or region. For the -binary classification target, the target encoding is given by: - -.. math:: - S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_y}{n} - -where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the -number of observations with :math:`Y=1` with category :math:`i`, :math:`n_i` is -the number of observations with category :math:`i`, :math:`n_y` is the number of -observations with :math:`Y=1`, :math:`n` is the number of observations, and -:math:`\lambda_i` is a shrinkage factor. The shrinkage factor is given by: - -.. math:: - \lambda_i = \frac{n_i}{m + n_i} - -where :math:`m` is a smoothing factor, which is controlled with the `smooth` -parameter in :class:`TargetEncoder`. Large smoothing factors will put more -weight on the global mean. When `smooth="auto"`, the smoothing factor is -computed as an empirical Bayes estimate: :math:`m=\sigma_c^2/\tau^2`, where -:math:`\sigma_i^2` is the variance of `y` with category :math:`i` and -:math:`\tau^2` is the global variance of `y`. - -For continuous targets, the formulation is similar to binary classification: - -.. math:: - S_i = \lambda_i\frac{\sum_{k\in L_i}y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}y_k}{n} - -where :math:`L_i` is the set of observations for which :math:`X=X_i` and -:math:`n_i` is the cardinality of :math:`L_i`. - -:meth:`~TargetEncoder.fit_transform` internally relies on a cross validation -scheme to prevent information from the target from leaking into the train-time -representation for non-informative high-cardinality categorical variables and -help prevent the downstream model to overfit spurious correlations. Note that -as a result, `fit(X, y).transform(X)` does not equal `fit_transform(X, y)`. In -:meth:`~TargetEncoder.fit_transform`, the training data is split into multiple -folds and encodes each fold by using the encodings trained on the other folds. -After cross validation is complete in :meth:`~TargetEncoder.fit_transform`, the -target encoder learns one final encoding on the whole training set. This final -encoding is used to encode categories in :meth:`~TargetEncoder.transform`. The -following diagram shows the cross validation scheme in +cardinality categories are location based such as zip code or region. + +.. dropdown:: Binary classification targets + + For the binary classification target, the target encoding is given by: + + .. math:: + S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_Y}{n} + + where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the + number of observations with :math:`Y=1` and category :math:`i`, :math:`n_i` is + the number of observations with category :math:`i`, :math:`n_Y` is the number of + observations with :math:`Y=1`, :math:`n` is the number of observations, and + :math:`\lambda_i` is a shrinkage factor for category :math:`i`. The shrinkage + factor is given by: + + .. math:: + \lambda_i = \frac{n_i}{m + n_i} + + where :math:`m` is a smoothing factor, which is controlled with the `smooth` + parameter in :class:`TargetEncoder`. Large smoothing factors will put more + weight on the global mean. When `smooth="auto"`, the smoothing factor is + computed as an empirical Bayes estimate: :math:`m=\sigma_i^2/\tau^2`, where + :math:`\sigma_i^2` is the variance of `y` with category :math:`i` and + :math:`\tau^2` is the global variance of `y`. + +.. dropdown:: Multiclass classification targets + + For multiclass classification targets, the formulation is similar to binary + classification: + + .. math:: + S_{ij} = \lambda_i\frac{n_{iY_j}}{n_i} + (1 - \lambda_i)\frac{n_{Y_j}}{n} + + where :math:`S_{ij}` is the encoding for category :math:`i` and class :math:`j`, + :math:`n_{iY_j}` is the number of observations with :math:`Y=j` and category + :math:`i`, :math:`n_i` is the number of observations with category :math:`i`, + :math:`n_{Y_j}` is the number of observations with :math:`Y=j`, :math:`n` is the + number of observations, and :math:`\lambda_i` is a shrinkage factor for category + :math:`i`. + +.. dropdown:: Continuous targets + + For continuous targets, the formulation is similar to binary classification: + + .. math:: + S_i = \lambda_i\frac{\sum_{k\in L_i}Y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}Y_k}{n} + + where :math:`L_i` is the set of observations with category :math:`i` and + :math:`n_i` is the number of observations with category :math:`i`. + + +:meth:`~TargetEncoder.fit_transform` internally relies on a :term:`cross fitting` +scheme to prevent target information from leaking into the train-time +representation, especially for non-informative high-cardinality categorical +variables, and help prevent the downstream model from overfitting spurious +correlations. Note that as a result, `fit(X, y).transform(X)` does not equal +`fit_transform(X, y)`. In :meth:`~TargetEncoder.fit_transform`, the training +data is split into *k* folds (determined by the `cv` parameter) and each fold is +encoded using the encodings learnt using the other *k-1* folds. The following +diagram shows the :term:`cross fitting` scheme in :meth:`~TargetEncoder.fit_transform` with the default `cv=5`: .. image:: ../images/target_encoder_cross_validation.svg :width: 600 :align: center -The :meth:`~TargetEncoder.fit` method does **not** use any cross validation +:meth:`~TargetEncoder.fit_transform` also learns a 'full data' encoding using +the whole training set. This is never used in +:meth:`~TargetEncoder.fit_transform` but is saved to the attribute `encodings_`, +for use when :meth:`~TargetEncoder.transform` is called. Note that the encodings +learned for each fold during the :term:`cross fitting` scheme are not saved to +an attribute. + +The :meth:`~TargetEncoder.fit` method does **not** use any :term:`cross fitting` schemes and learns one encoding on the entire training set, which is used to encode categories in :meth:`~TargetEncoder.transform`. -:meth:`~TargetEncoder.fit`'s one encoding is the same as the final encoding -learned in :meth:`~TargetEncoder.fit_transform`. +This encoding is the same as the 'full data' +encoding learned in :meth:`~TargetEncoder.fit_transform`. .. note:: :class:`TargetEncoder` considers missing values, such as `np.nan` or `None`, @@ -938,21 +971,21 @@ learned in :meth:`~TargetEncoder.fit_transform`. that are not seen during `fit` are encoded with the target mean, i.e. `target_mean_`. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py` -.. topic:: References +.. rubric:: References - .. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality - categorical attributes in classification and prediction problems" - SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32. <10.1145/507533.507538>` +.. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality + categorical attributes in classification and prediction problems" + SIGKDD Explor. Newsl. 3, 1 (July 2001), 27-32. <10.1145/507533.507538>` - .. [PAR] :doi:`Pargent, F., Pfisterer, F., Thomas, J. et al. "Regularized target - encoding outperforms traditional methods in supervised machine learning with - high cardinality features" Comput Stat 37, 2671–2692 (2022) - <10.1007/s00180-022-01207-6>` +.. [PAR] :doi:`Pargent, F., Pfisterer, F., Thomas, J. et al. "Regularized target + encoding outperforms traditional methods in supervised machine learning with + high cardinality features" Comput Stat 37, 2671-2692 (2022) + <10.1007/s00180-022-01207-6>` .. _preprocessing_discretization: @@ -988,9 +1021,9 @@ For each feature, the bin edges are computed during ``fit`` and together with the number of bins, they will define the intervals. Therefore, for the current example, these intervals are defined as: - - feature 1: :math:`{[-\infty, -1), [-1, 2), [2, \infty)}` - - feature 2: :math:`{[-\infty, 5), [5, \infty)}` - - feature 3: :math:`{[-\infty, 14), [14, \infty)}` +- feature 1: :math:`{[-\infty, -1), [-1, 2), [2, \infty)}` +- feature 2: :math:`{[-\infty, 5), [5, \infty)}` +- feature 3: :math:`{[-\infty, 14), [14, \infty)}` Based on these bin intervals, ``X`` is transformed as follows:: @@ -1018,6 +1051,8 @@ For instance, we can use the Pandas function :func:`pandas.cut`:: >>> import pandas as pd >>> import numpy as np + >>> from sklearn import preprocessing + >>> >>> bins = [0, 1, 13, 20, 60, np.inf] >>> labels = ['infant', 'kid', 'teen', 'adult', 'senior citizen'] >>> transformer = preprocessing.FunctionTransformer( @@ -1028,11 +1063,11 @@ For instance, we can use the Pandas function :func:`pandas.cut`:: ['infant', 'kid', 'teen', 'adult', 'senior citizen'] Categories (5, object): ['infant' < 'kid' < 'teen' < 'adult' < 'senior citizen'] -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py` .. _preprocessing_binarization: @@ -1179,23 +1214,23 @@ below. Some of the advantages of splines over polynomials are: - - B-splines are very flexible and robust if you keep a fixed low degree, - usually 3, and parsimoniously adapt the number of knots. Polynomials - would need a higher degree, which leads to the next point. - - B-splines do not have oscillatory behaviour at the boundaries as have - polynomials (the higher the degree, the worse). This is known as `Runge's - phenomenon `_. - - B-splines provide good options for extrapolation beyond the boundaries, - i.e. beyond the range of fitted values. Have a look at the option - ``extrapolation``. - - B-splines generate a feature matrix with a banded structure. For a single - feature, every row contains only ``degree + 1`` non-zero elements, which - occur consecutively and are even positive. This results in a matrix with - good numerical properties, e.g. a low condition number, in sharp contrast - to a matrix of polynomials, which goes under the name - `Vandermonde matrix `_. - A low condition number is important for stable algorithms of linear - models. +- B-splines are very flexible and robust if you keep a fixed low degree, + usually 3, and parsimoniously adapt the number of knots. Polynomials + would need a higher degree, which leads to the next point. +- B-splines do not have oscillatory behaviour at the boundaries as have + polynomials (the higher the degree, the worse). This is known as `Runge's + phenomenon `_. +- B-splines provide good options for extrapolation beyond the boundaries, + i.e. beyond the range of fitted values. Have a look at the option + ``extrapolation``. +- B-splines generate a feature matrix with a banded structure. For a single + feature, every row contains only ``degree + 1`` non-zero elements, which + occur consecutively and are even positive. This results in a matrix with + good numerical properties, e.g. a low condition number, in sharp contrast + to a matrix of polynomials, which goes under the name + `Vandermonde matrix `_. + A low condition number is important for stable algorithms of linear + models. The following code snippet shows splines in action:: @@ -1225,19 +1260,20 @@ Interestingly, a :class:`SplineTransformer` of ``degree=0`` is the same as ``encode='onehot-dense'`` and ``n_bins = n_knots - 1`` if ``knots = strategy``. -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` +* :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` - * :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` +.. dropdown:: References -.. topic:: References: + * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and + Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121. - * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and - Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121. + * Perperoglou, A., Sauerbrei, W., Abrahamowicz, M. et al. :doi:`A review of + spline function procedures in R <10.1186/s12874-019-0666-3>`. + BMC Med Res Methodol 19, 46 (2019). - * Perperoglou, A., Sauerbrei, W., Abrahamowicz, M. et al. :doi:`A review of - spline function procedures in R <10.1186/s12874-019-0666-3>`. - BMC Med Res Methodol 19, 46 (2019). .. _function_transformer: diff --git a/doc/modules/random_projection.rst b/doc/modules/random_projection.rst index 6931feb34ad1d..173aee434576c 100644 --- a/doc/modules/random_projection.rst +++ b/doc/modules/random_projection.rst @@ -19,19 +19,19 @@ samples of the dataset. Thus random projection is a suitable approximation technique for distance based method. -.. topic:: References: +.. rubric:: References - * Sanjoy Dasgupta. 2000. - `Experiments with random projection. `_ - In Proceedings of the Sixteenth conference on Uncertainty in artificial - intelligence (UAI'00), Craig Boutilier and MoisÊs Goldszmidt (Eds.). Morgan - Kaufmann Publishers Inc., San Francisco, CA, USA, 143-151. +* Sanjoy Dasgupta. 2000. + `Experiments with random projection. `_ + In Proceedings of the Sixteenth conference on Uncertainty in artificial + intelligence (UAI'00), Craig Boutilier and MoisÊs Goldszmidt (Eds.). Morgan + Kaufmann Publishers Inc., San Francisco, CA, USA, 143-151. - * Ella Bingham and Heikki Mannila. 2001. - `Random projection in dimensionality reduction: applications to image and text data. `_ - In Proceedings of the seventh ACM SIGKDD international conference on - Knowledge discovery and data mining (KDD '01). ACM, New York, NY, USA, - 245-250. +* Ella Bingham and Heikki Mannila. 2001. + `Random projection in dimensionality reduction: applications to image and text data. `_ + In Proceedings of the seventh ACM SIGKDD international conference on + Knowledge discovery and data mining (KDD '01). ACM, New York, NY, USA, + 245-250. .. _johnson_lindenstrauss: @@ -74,17 +74,17 @@ bounded distortion introduced by the random projection:: :scale: 75 :align: center -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` - for a theoretical explication on the Johnson-Lindenstrauss lemma and an - empirical validation using sparse random matrices. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` + for a theoretical explication on the Johnson-Lindenstrauss lemma and an + empirical validation using sparse random matrices. -.. topic:: References: +.. rubric:: References - * Sanjoy Dasgupta and Anupam Gupta, 1999. - `An elementary proof of the Johnson-Lindenstrauss Lemma. - `_ +* Sanjoy Dasgupta and Anupam Gupta, 1999. + `An elementary proof of the Johnson-Lindenstrauss Lemma. + `_ .. _gaussian_random_matrix: @@ -148,18 +148,17 @@ projection transformer:: (100, 3947) -.. topic:: References: +.. rubric:: References - * D. Achlioptas. 2003. - `Database-friendly random projections: Johnson-Lindenstrauss with binary - coins `_. - Journal of Computer and System Sciences 66 (2003) 671–687 +* D. Achlioptas. 2003. + `Database-friendly random projections: Johnson-Lindenstrauss with binary + coins `_. + Journal of Computer and System Sciences 66 (2003) 671-687. - * Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006. - `Very sparse random projections. `_ - In Proceedings of the 12th ACM SIGKDD international conference on - Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA, - 287-296. +* Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006. + `Very sparse random projections. `_ + In Proceedings of the 12th ACM SIGKDD international conference on + Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA, 287-296. .. _random_projection_inverse_transform: diff --git a/doc/modules/semi_supervised.rst b/doc/modules/semi_supervised.rst index 47e8bfffdd9a7..8ba33638c6eec 100644 --- a/doc/modules/semi_supervised.rst +++ b/doc/modules/semi_supervised.rst @@ -60,18 +60,18 @@ until all samples have labels or no new samples are selected in that iteration. When using the self-training classifier, the :ref:`calibration ` of the classifier is important. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_self_training_varying_threshold.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_self_training_varying_threshold.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` -.. topic:: References +.. rubric:: References - .. [1] :doi:`"Unsupervised word sense disambiguation rivaling supervised methods" - <10.3115/981658.981684>` - David Yarowsky, Proceedings of the 33rd annual meeting on Association for - Computational Linguistics (ACL '95). Association for Computational Linguistics, - Stroudsburg, PA, USA, 189-196. +.. [1] :doi:`"Unsupervised word sense disambiguation rivaling supervised methods" + <10.3115/981658.981684>` + David Yarowsky, Proceedings of the 33rd annual meeting on Association for + Computational Linguistics (ACL '95). Association for Computational Linguistics, + Stroudsburg, PA, USA, 189-196. .. _label_propagation: @@ -121,11 +121,11 @@ Label propagation models have two built-in kernel methods. Choice of kernel effects both scalability and performance of the algorithms. The following are available: - * rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is - specified by keyword gamma. +* rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is + specified by keyword gamma. - * knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword - n_neighbors. +* knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword + n_neighbors. The RBF kernel will produce a fully connected graph which is represented in memory by a dense matrix. This matrix may be very large and combined with the cost of @@ -134,18 +134,18 @@ algorithm can lead to prohibitively long running times. On the other hand, the KNN kernel will produce a much more memory-friendly sparse matrix which can drastically reduce running times. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py` -.. topic:: References +.. rubric:: References - [2] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised - Learning (2006), pp. 193-216 +[2] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised +Learning (2006), pp. 193-216 - [3] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient - Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 - https://www.gatsby.ucl.ac.uk/aistats/fullpapers/204.pdf +[3] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient +Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 +https://www.gatsby.ucl.ac.uk/aistats/fullpapers/204.pdf diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst index c50ed66868c1b..73df123b4ed19 100644 --- a/doc/modules/sgd.rst +++ b/doc/modules/sgd.rst @@ -36,16 +36,16 @@ different means. The advantages of Stochastic Gradient Descent are: - + Efficiency. ++ Efficiency. - + Ease of implementation (lots of opportunities for code tuning). ++ Ease of implementation (lots of opportunities for code tuning). The disadvantages of Stochastic Gradient Descent include: - + SGD requires a number of hyperparameters such as the regularization - parameter and the number of iterations. ++ SGD requires a number of hyperparameters such as the regularization + parameter and the number of iterations. - + SGD is sensitive to feature scaling. ++ SGD is sensitive to feature scaling. .. warning:: @@ -111,12 +111,12 @@ the coefficients and the input sample, plus the intercept) is given by The concrete loss function can be set via the ``loss`` parameter. :class:`SGDClassifier` supports the following loss functions: - * ``loss="hinge"``: (soft-margin) linear Support Vector Machine, - * ``loss="modified_huber"``: smoothed hinge loss, - * ``loss="log_loss"``: logistic regression, - * and all regression losses below. In this case the target is encoded as -1 - or 1, and the problem is treated as a regression problem. The predicted - class then correspond to the sign of the predicted target. +* ``loss="hinge"``: (soft-margin) linear Support Vector Machine, +* ``loss="modified_huber"``: smoothed hinge loss, +* ``loss="log_loss"``: logistic regression, +* and all regression losses below. In this case the target is encoded as -1 + or 1, and the problem is treated as a regression problem. The predicted + class then correspond to the sign of the predicted target. Please refer to the :ref:`mathematical section below ` for formulas. @@ -136,10 +136,10 @@ Using ``loss="log_loss"`` or ``loss="modified_huber"`` enables the The concrete penalty can be set via the ``penalty`` parameter. SGD supports the following penalties: - * ``penalty="l2"``: L2 norm penalty on ``coef_``. - * ``penalty="l1"``: L1 norm penalty on ``coef_``. - * ``penalty="elasticnet"``: Convex combination of L2 and L1; - ``(1 - l1_ratio) * L2 + l1_ratio * L1``. +* ``penalty="l2"``: L2 norm penalty on ``coef_``. +* ``penalty="l1"``: L1 norm penalty on ``coef_``. +* ``penalty="elasticnet"``: Convex combination of L2 and L1; + ``(1 - l1_ratio) * L2 + l1_ratio * L1``. The default setting is ``penalty="l2"``. The L1 penalty leads to sparse solutions, driving most coefficients to zero. The Elastic Net [#5]_ solves @@ -189,14 +189,14 @@ For classification with a logistic loss, another variant of SGD with an averaging strategy is available with Stochastic Average Gradient (SAG) algorithm, available as a solver in :class:`LogisticRegression`. -.. topic:: Examples: +.. rubric:: Examples - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_separating_hyperplane.py`, - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_iris.py` - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_weighted_samples.py` - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_comparison.py` - - :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` - (See the Note in the example) +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_separating_hyperplane.py` +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_iris.py` +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_weighted_samples.py` +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_comparison.py` +- :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` + (See the Note in the example) Regression ========== @@ -211,9 +211,9 @@ samples (> 10.000), for other problems we recommend :class:`Ridge`, The concrete loss function can be set via the ``loss`` parameter. :class:`SGDRegressor` supports the following loss functions: - * ``loss="squared_error"``: Ordinary least squares, - * ``loss="huber"``: Huber loss for robust regression, - * ``loss="epsilon_insensitive"``: linear Support Vector Regression. +* ``loss="squared_error"``: Ordinary least squares, +* ``loss="huber"``: Huber loss for robust regression, +* ``loss="epsilon_insensitive"``: linear Support Vector Regression. Please refer to the :ref:`mathematical section below ` for formulas. @@ -249,38 +249,40 @@ quadratic in the number of samples. with a large number of training samples (> 10,000) for which the SGD variant can be several orders of magnitude faster. -Its implementation is based on the implementation of the stochastic -gradient descent. Indeed, the original optimization problem of the One-Class -SVM is given by +.. dropdown:: Mathematical details -.. math:: + Its implementation is based on the implementation of the stochastic + gradient descent. Indeed, the original optimization problem of the One-Class + SVM is given by - \begin{aligned} - \min_{w, \rho, \xi} & \quad \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \xi_i \\ - \text{s.t.} & \quad \langle w, x_i \rangle \geq \rho - \xi_i \quad 1 \leq i \leq n \\ - & \quad \xi_i \geq 0 \quad 1 \leq i \leq n - \end{aligned} + .. math:: -where :math:`\nu \in (0, 1]` is the user-specified parameter controlling the -proportion of outliers and the proportion of support vectors. Getting rid of -the slack variables :math:`\xi_i` this problem is equivalent to + \begin{aligned} + \min_{w, \rho, \xi} & \quad \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \xi_i \\ + \text{s.t.} & \quad \langle w, x_i \rangle \geq \rho - \xi_i \quad 1 \leq i \leq n \\ + & \quad \xi_i \geq 0 \quad 1 \leq i \leq n + \end{aligned} -.. math:: + where :math:`\nu \in (0, 1]` is the user-specified parameter controlling the + proportion of outliers and the proportion of support vectors. Getting rid of + the slack variables :math:`\xi_i` this problem is equivalent to - \min_{w, \rho} \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \max(0, \rho - \langle w, x_i \rangle) \, . + .. math:: -Multiplying by the constant :math:`\nu` and introducing the intercept -:math:`b = 1 - \rho` we obtain the following equivalent optimization problem + \min_{w, \rho} \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \max(0, \rho - \langle w, x_i \rangle) \, . -.. math:: + Multiplying by the constant :math:`\nu` and introducing the intercept + :math:`b = 1 - \rho` we obtain the following equivalent optimization problem + + .. math:: - \min_{w, b} \frac{\nu}{2}\Vert w \Vert^2 + b\nu + \frac{1}{n} \sum_{i=1}^n \max(0, 1 - (\langle w, x_i \rangle + b)) \, . + \min_{w, b} \frac{\nu}{2}\Vert w \Vert^2 + b\nu + \frac{1}{n} \sum_{i=1}^n \max(0, 1 - (\langle w, x_i \rangle + b)) \, . -This is similar to the optimization problems studied in section -:ref:`sgd_mathematical_formulation` with :math:`y_i = 1, 1 \leq i \leq n` and -:math:`\alpha = \nu/2`, :math:`L` being the hinge loss function and :math:`R` -being the L2 norm. We just need to add the term :math:`b\nu` in the -optimization loop. + This is similar to the optimization problems studied in section + :ref:`sgd_mathematical_formulation` with :math:`y_i = 1, 1 \leq i \leq n` and + :math:`\alpha = \nu/2`, :math:`L` being the hinge loss function and :math:`R` + being the L2 norm. We just need to add the term :math:`b\nu` in the + optimization loop. As :class:`SGDClassifier` and :class:`SGDRegressor`, :class:`SGDOneClassSVM` supports averaged SGD. Averaging can be enabled by setting ``average=True``. @@ -299,9 +301,9 @@ efficiency, however, use the CSR matrix format as defined in `scipy.sparse.csr_matrix `_. -.. topic:: Examples: +.. rubric:: Examples - - :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +- :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` Complexity ========== @@ -321,14 +323,14 @@ Stopping criterion The classes :class:`SGDClassifier` and :class:`SGDRegressor` provide two criteria to stop the algorithm when a given level of convergence is reached: - * With ``early_stopping=True``, the input data is split into a training set - and a validation set. The model is then fitted on the training set, and the - stopping criterion is based on the prediction score (using the `score` - method) computed on the validation set. The size of the validation set - can be changed with the parameter ``validation_fraction``. - * With ``early_stopping=False``, the model is fitted on the entire input data - and the stopping criterion is based on the objective function computed on - the training data. +* With ``early_stopping=True``, the input data is split into a training set + and a validation set. The model is then fitted on the training set, and the + stopping criterion is based on the prediction score (using the `score` + method) computed on the validation set. The size of the validation set + can be changed with the parameter ``validation_fraction``. +* With ``early_stopping=False``, the model is fitted on the entire input data + and the stopping criterion is based on the objective function computed on + the training data. In both cases, the criterion is evaluated once by epoch, and the algorithm stops when the criterion does not improve ``n_iter_no_change`` times in a row. The @@ -339,51 +341,51 @@ stops in any case after a maximum number of iteration ``max_iter``. Tips on Practical Use ===================== - * Stochastic Gradient Descent is sensitive to feature scaling, so it - is highly recommended to scale your data. For example, scale each - attribute on the input vector X to [0,1] or [-1,+1], or standardize - it to have mean 0 and variance 1. Note that the *same* scaling - must be applied to the test vector to obtain meaningful - results. This can be easily done using :class:`StandardScaler`:: +* Stochastic Gradient Descent is sensitive to feature scaling, so it + is highly recommended to scale your data. For example, scale each + attribute on the input vector X to [0,1] or [-1,+1], or standardize + it to have mean 0 and variance 1. Note that the *same* scaling must be + applied to the test vector to obtain meaningful results. This can be easily + done using :class:`~sklearn.preprocessing.StandardScaler`:: - from sklearn.preprocessing import StandardScaler - scaler = StandardScaler() - scaler.fit(X_train) # Don't cheat - fit only on training data - X_train = scaler.transform(X_train) - X_test = scaler.transform(X_test) # apply same transformation to test data + from sklearn.preprocessing import StandardScaler + scaler = StandardScaler() + scaler.fit(X_train) # Don't cheat - fit only on training data + X_train = scaler.transform(X_train) + X_test = scaler.transform(X_test) # apply same transformation to test data - # Or better yet: use a pipeline! - from sklearn.pipeline import make_pipeline - est = make_pipeline(StandardScaler(), SGDClassifier()) - est.fit(X_train) - est.predict(X_test) + # Or better yet: use a pipeline! + from sklearn.pipeline import make_pipeline + est = make_pipeline(StandardScaler(), SGDClassifier()) + est.fit(X_train) + est.predict(X_test) - If your attributes have an intrinsic scale (e.g. word frequencies or - indicator features) scaling is not needed. + If your attributes have an intrinsic scale (e.g. word frequencies or + indicator features) scaling is not needed. - * Finding a reasonable regularization term :math:`\alpha` is - best done using automatic hyper-parameter search, e.g. - :class:`~sklearn.model_selection.GridSearchCV` or - :class:`~sklearn.model_selection.RandomizedSearchCV`, usually in the - range ``10.0**-np.arange(1,7)``. +* Finding a reasonable regularization term :math:`\alpha` is + best done using automatic hyper-parameter search, e.g. + :class:`~sklearn.model_selection.GridSearchCV` or + :class:`~sklearn.model_selection.RandomizedSearchCV`, usually in the + range ``10.0**-np.arange(1,7)``. - * Empirically, we found that SGD converges after observing - approximately 10^6 training samples. Thus, a reasonable first guess - for the number of iterations is ``max_iter = np.ceil(10**6 / n)``, - where ``n`` is the size of the training set. +* Empirically, we found that SGD converges after observing + approximately 10^6 training samples. Thus, a reasonable first guess + for the number of iterations is ``max_iter = np.ceil(10**6 / n)``, + where ``n`` is the size of the training set. - * If you apply SGD to features extracted using PCA we found that - it is often wise to scale the feature values by some constant `c` - such that the average L2 norm of the training data equals one. +* If you apply SGD to features extracted using PCA we found that + it is often wise to scale the feature values by some constant `c` + such that the average L2 norm of the training data equals one. - * We found that Averaged SGD works best with a larger number of features - and a higher eta0 +* We found that Averaged SGD works best with a larger number of features + and a higher eta0. -.. topic:: References: +.. rubric:: References - * `"Efficient BackProp" `_ - Y. LeCun, L. Bottou, G. Orr, K. MÃŧller - In Neural Networks: Tricks - of the Trade 1998. +* `"Efficient BackProp" `_ + Y. LeCun, L. Bottou, G. Orr, K. MÃŧller - In Neural Networks: Tricks + of the Trade 1998. .. _sgd_mathematical_formulation: @@ -410,26 +412,28 @@ where :math:`L` is a loss function that measures model (mis)fit and complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls the regularization strength. -Different choices for :math:`L` entail different classifiers or regressors: - -- Hinge (soft-margin): equivalent to Support Vector Classification. - :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))`. -- Perceptron: - :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`. -- Modified Huber: - :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) > - 1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. -- Log Loss: equivalent to Logistic Regression. - :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`. -- Squared Error: Linear regression (Ridge or Lasso depending on - :math:`R`). - :math:`L(y_i, f(x_i)) = \frac{1}{2}(y_i - f(x_i))^2`. -- Huber: less sensitive to outliers than least-squares. It is equivalent to - least squares when :math:`|y_i - f(x_i)| \leq \varepsilon`, and - :math:`L(y_i, f(x_i)) = \varepsilon |y_i - f(x_i)| - \frac{1}{2} - \varepsilon^2` otherwise. -- Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression. - :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`. +.. dropdown:: Loss functions details + + Different choices for :math:`L` entail different classifiers or regressors: + + - Hinge (soft-margin): equivalent to Support Vector Classification. + :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))`. + - Perceptron: + :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`. + - Modified Huber: + :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) > + -1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. + - Log Loss: equivalent to Logistic Regression. + :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`. + - Squared Error: Linear regression (Ridge or Lasso depending on + :math:`R`). + :math:`L(y_i, f(x_i)) = \frac{1}{2}(y_i - f(x_i))^2`. + - Huber: less sensitive to outliers than least-squares. It is equivalent to + least squares when :math:`|y_i - f(x_i)| \leq \varepsilon`, and + :math:`L(y_i, f(x_i)) = \varepsilon |y_i - f(x_i)| - \frac{1}{2} + \varepsilon^2` otherwise. + - Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression. + :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`. All of the above loss functions can be regarded as an upper bound on the misclassification error (Zero-one loss) as shown in the Figure below. @@ -442,12 +446,12 @@ misclassification error (Zero-one loss) as shown in the Figure below. Popular choices for the regularization term :math:`R` (the `penalty` parameter) include: - - L2 norm: :math:`R(w) := \frac{1}{2} \sum_{j=1}^{m} w_j^2 = ||w||_2^2`, - - L1 norm: :math:`R(w) := \sum_{j=1}^{m} |w_j|`, which leads to sparse - solutions. - - Elastic Net: :math:`R(w) := \frac{\rho}{2} \sum_{j=1}^{n} w_j^2 + - (1-\rho) \sum_{j=1}^{m} |w_j|`, a convex combination of L2 and L1, where - :math:`\rho` is given by ``1 - l1_ratio``. +- L2 norm: :math:`R(w) := \frac{1}{2} \sum_{j=1}^{m} w_j^2 = ||w||_2^2`, +- L1 norm: :math:`R(w) := \sum_{j=1}^{m} |w_j|`, which leads to sparse + solutions. +- Elastic Net: :math:`R(w) := \frac{\rho}{2} \sum_{j=1}^{n} w_j^2 + + (1-\rho) \sum_{j=1}^{m} |w_j|`, a convex combination of L2 and L1, where + :math:`\rho` is given by ``1 - l1_ratio``. The Figure below shows the contours of the different regularization terms in a 2-dimensional parameter space (:math:`m=2`) when :math:`R(w) = 1`. @@ -491,7 +495,7 @@ where :math:`t` is the time step (there are a total of `n_samples * n_iter` time steps), :math:`t_0` is determined based on a heuristic proposed by LÊon Bottou such that the expected initial updates are comparable with the expected size of the weights (this assuming that the norm of the training samples is -approx. 1). The exact definition can be found in ``_init_t`` in :class:`BaseSGD`. +approx. 1). The exact definition can be found in ``_init_t`` in `BaseSGD`. For regression the default learning rate schedule is inverse scaling @@ -541,32 +545,29 @@ We use the truncated gradient algorithm proposed in [#3]_ for L1 regularization (and the Elastic Net). The code is written in Cython. -.. topic:: References: +.. rubric:: References - .. [#1] `"Stochastic Gradient Descent" - `_ L. Bottou - Website, 2010. +.. [#1] `"Stochastic Gradient Descent" + `_ L. Bottou - Website, 2010. - .. [#2] :doi:`"Pegasos: Primal estimated sub-gradient solver for svm" - <10.1145/1273496.1273598>` - S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07. +.. [#2] :doi:`"Pegasos: Primal estimated sub-gradient solver for svm" + <10.1145/1273496.1273598>` + S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07. - .. [#3] `"Stochastic gradient descent training for l1-regularized - log-linear models with cumulative penalty" - `_ - Y. Tsuruoka, J. Tsujii, S. Ananiadou - In Proceedings of the AFNLP/ACL - '09. +.. [#3] `"Stochastic gradient descent training for l1-regularized + log-linear models with cumulative penalty" + `_ + Y. Tsuruoka, J. Tsujii, S. Ananiadou - In Proceedings of the AFNLP/ACL'09. - .. [#4] :arxiv:`"Towards Optimal One Pass Large Scale Learning with - Averaged Stochastic Gradient Descent" - <1107.2490v2>` - Xu, Wei (2011) +.. [#4] :arxiv:`"Towards Optimal One Pass Large Scale Learning with + Averaged Stochastic Gradient Descent" + <1107.2490v2>`. Xu, Wei (2011) - .. [#5] :doi:`"Regularization and variable selection via the elastic net" - <10.1111/j.1467-9868.2005.00503.x>` - H. Zou, T. Hastie - Journal of the Royal Statistical Society Series B, - 67 (2), 301-320. +.. [#5] :doi:`"Regularization and variable selection via the elastic net" + <10.1111/j.1467-9868.2005.00503.x>` + H. Zou, T. Hastie - Journal of the Royal Statistical Society Series B, + 67 (2), 301-320. - .. [#6] :doi:`"Solving large scale linear prediction problems using stochastic - gradient descent algorithms" - <10.1145/1015330.1015332>` - T. Zhang - In Proceedings of ICML '04. +.. [#6] :doi:`"Solving large scale linear prediction problems using stochastic + gradient descent algorithms" <10.1145/1015330.1015332>` + T. Zhang - In Proceedings of ICML '04. diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index c5b998e48707a..47115e43a89e0 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -16,27 +16,27 @@ methods used for :ref:`classification `, The advantages of support vector machines are: - - Effective in high dimensional spaces. +- Effective in high dimensional spaces. - - Still effective in cases where number of dimensions is greater - than the number of samples. +- Still effective in cases where number of dimensions is greater + than the number of samples. - - Uses a subset of training points in the decision function (called - support vectors), so it is also memory efficient. +- Uses a subset of training points in the decision function (called + support vectors), so it is also memory efficient. - - Versatile: different :ref:`svm_kernels` can be - specified for the decision function. Common kernels are - provided, but it is also possible to specify custom kernels. +- Versatile: different :ref:`svm_kernels` can be + specified for the decision function. Common kernels are + provided, but it is also possible to specify custom kernels. The disadvantages of support vector machines include: - - If the number of features is much greater than the number of - samples, avoid over-fitting in choosing :ref:`svm_kernels` and regularization - term is crucial. +- If the number of features is much greater than the number of + samples, avoid over-fitting in choosing :ref:`svm_kernels` and regularization + term is crucial. - - SVMs do not directly provide probability estimates, these are - calculated using an expensive five-fold cross-validation - (see :ref:`Scores and probabilities `, below). +- SVMs do not directly provide probability estimates, these are + calculated using an expensive five-fold cross-validation + (see :ref:`Scores and probabilities `, below). The support vector machines in scikit-learn support both dense (``numpy.ndarray`` and convertible to that by ``numpy.asarray``) and @@ -60,14 +60,19 @@ capable of performing binary and multi-class classification on a dataset. :align: center -:class:`SVC` and :class:`NuSVC` are similar methods, but accept -slightly different sets of parameters and have different mathematical -formulations (see section :ref:`svm_mathematical_formulation`). On the -other hand, :class:`LinearSVC` is another (faster) implementation of Support -Vector Classification for the case of a linear kernel. Note that -:class:`LinearSVC` does not accept parameter ``kernel``, as this is -assumed to be linear. It also lacks some of the attributes of -:class:`SVC` and :class:`NuSVC`, like ``support_``. +:class:`SVC` and :class:`NuSVC` are similar methods, but accept slightly +different sets of parameters and have different mathematical formulations (see +section :ref:`svm_mathematical_formulation`). On the other hand, +:class:`LinearSVC` is another (faster) implementation of Support Vector +Classification for the case of a linear kernel. It also +lacks some of the attributes of :class:`SVC` and :class:`NuSVC`, like +`support_`. :class:`LinearSVC` uses `squared_hinge` loss and due to its +implementation in `liblinear` it also regularizes the intercept, if considered. +This effect can however be reduced by carefully fine tuning its +`intercept_scaling` parameter, which allows the intercept term to have a +different regularization behavior compared to the other features. The +classification results and score can therefore differ from the other two +classifiers. As other classifiers, :class:`SVC`, :class:`NuSVC` and :class:`LinearSVC` take as input two arrays: an array `X` of shape @@ -103,11 +108,10 @@ properties of these support vectors can be found in attributes >>> clf.n_support_ array([1, 1]...) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane.py`, - * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py`, +* :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` .. _svm_multi_class: @@ -129,7 +133,7 @@ function of shape ``(n_samples, n_classes)``. >>> clf.fit(X, Y) SVC(decision_function_shape='ovo') >>> dec = clf.decision_function([[1]]) - >>> dec.shape[1] # 4 classes: 4*3/2 = 6 + >>> dec.shape[1] # 6 classes: 4*3/2 = 6 6 >>> clf.decision_function_shape = "ovr" >>> dec = clf.decision_function([[1]]) @@ -139,9 +143,9 @@ function of shape ``(n_samples, n_classes)``. On the other hand, :class:`LinearSVC` implements "one-vs-the-rest" multi-class strategy, thus training `n_classes` models. - >>> lin_clf = svm.LinearSVC(dual="auto") + >>> lin_clf = svm.LinearSVC() >>> lin_clf.fit(X, Y) - LinearSVC(dual='auto') + LinearSVC() >>> dec = lin_clf.decision_function([[1]]) >>> dec.shape[1] 4 @@ -149,59 +153,61 @@ multi-class strategy, thus training `n_classes` models. See :ref:`svm_mathematical_formulation` for a complete description of the decision function. -Note that the :class:`LinearSVC` also implements an alternative multi-class -strategy, the so-called multi-class SVM formulated by Crammer and Singer -[#8]_, by using the option ``multi_class='crammer_singer'``. In practice, -one-vs-rest classification is usually preferred, since the results are mostly -similar, but the runtime is significantly less. - -For "one-vs-rest" :class:`LinearSVC` the attributes ``coef_`` and ``intercept_`` -have the shape ``(n_classes, n_features)`` and ``(n_classes,)`` respectively. -Each row of the coefficients corresponds to one of the ``n_classes`` -"one-vs-rest" classifiers and similar for the intercepts, in the -order of the "one" class. - -In the case of "one-vs-one" :class:`SVC` and :class:`NuSVC`, the layout of -the attributes is a little more involved. In the case of a linear -kernel, the attributes ``coef_`` and ``intercept_`` have the shape -``(n_classes * (n_classes - 1) / 2, n_features)`` and ``(n_classes * -(n_classes - 1) / 2)`` respectively. This is similar to the layout for -:class:`LinearSVC` described above, with each row now corresponding -to a binary classifier. The order for classes -0 to n is "0 vs 1", "0 vs 2" , ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", . . -. "n-1 vs n". - -The shape of ``dual_coef_`` is ``(n_classes-1, n_SV)`` with -a somewhat hard to grasp layout. -The columns correspond to the support vectors involved in any -of the ``n_classes * (n_classes - 1) / 2`` "one-vs-one" classifiers. -Each support vector ``v`` has a dual coefficient in each of the -``n_classes - 1`` classifiers comparing the class of ``v`` against another class. -Note that some, but not all, of these dual coefficients, may be zero. -The ``n_classes - 1`` entries in each column are these dual coefficients, -ordered by the opposing class. - -This might be clearer with an example: consider a three class problem with -class 0 having three support vectors -:math:`v^{0}_0, v^{1}_0, v^{2}_0` and class 1 and 2 having two support vectors -:math:`v^{0}_1, v^{1}_1` and :math:`v^{0}_2, v^{1}_2` respectively. For each -support vector :math:`v^{j}_i`, there are two dual coefficients. Let's call -the coefficient of support vector :math:`v^{j}_i` in the classifier between -classes :math:`i` and :math:`k` :math:`\alpha^{j}_{i,k}`. -Then ``dual_coef_`` looks like this: - -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{1}_{2,0}`| -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|:math:`\alpha^{0}_{0,2}`|:math:`\alpha^{1}_{0,2}`|:math:`\alpha^{2}_{0,2}`|:math:`\alpha^{0}_{1,2}`|:math:`\alpha^{1}_{1,2}`|:math:`\alpha^{0}_{2,1}`|:math:`\alpha^{1}_{2,1}`| -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|Coefficients |Coefficients |Coefficients | -|for SVs of class 0 |for SVs of class 1 |for SVs of class 2 | -+--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+ - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`, +.. dropdown:: Details on multi-class strategies + + Note that the :class:`LinearSVC` also implements an alternative multi-class + strategy, the so-called multi-class SVM formulated by Crammer and Singer + [#8]_, by using the option ``multi_class='crammer_singer'``. In practice, + one-vs-rest classification is usually preferred, since the results are mostly + similar, but the runtime is significantly less. + + For "one-vs-rest" :class:`LinearSVC` the attributes ``coef_`` and ``intercept_`` + have the shape ``(n_classes, n_features)`` and ``(n_classes,)`` respectively. + Each row of the coefficients corresponds to one of the ``n_classes`` + "one-vs-rest" classifiers and similar for the intercepts, in the + order of the "one" class. + + In the case of "one-vs-one" :class:`SVC` and :class:`NuSVC`, the layout of + the attributes is a little more involved. In the case of a linear + kernel, the attributes ``coef_`` and ``intercept_`` have the shape + ``(n_classes * (n_classes - 1) / 2, n_features)`` and ``(n_classes * + (n_classes - 1) / 2)`` respectively. This is similar to the layout for + :class:`LinearSVC` described above, with each row now corresponding + to a binary classifier. The order for classes + 0 to n is "0 vs 1", "0 vs 2" , ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", . . + . "n-1 vs n". + + The shape of ``dual_coef_`` is ``(n_classes-1, n_SV)`` with + a somewhat hard to grasp layout. + The columns correspond to the support vectors involved in any + of the ``n_classes * (n_classes - 1) / 2`` "one-vs-one" classifiers. + Each support vector ``v`` has a dual coefficient in each of the + ``n_classes - 1`` classifiers comparing the class of ``v`` against another class. + Note that some, but not all, of these dual coefficients, may be zero. + The ``n_classes - 1`` entries in each column are these dual coefficients, + ordered by the opposing class. + + This might be clearer with an example: consider a three class problem with + class 0 having three support vectors + :math:`v^{0}_0, v^{1}_0, v^{2}_0` and class 1 and 2 having two support vectors + :math:`v^{0}_1, v^{1}_1` and :math:`v^{0}_2, v^{1}_2` respectively. For each + support vector :math:`v^{j}_i`, there are two dual coefficients. Let's call + the coefficient of support vector :math:`v^{j}_i` in the classifier between + classes :math:`i` and :math:`k` :math:`\alpha^{j}_{i,k}`. + Then ``dual_coef_`` looks like this: + + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{1}_{2,0}`| + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |:math:`\alpha^{0}_{0,2}`|:math:`\alpha^{1}_{0,2}`|:math:`\alpha^{2}_{0,2}`|:math:`\alpha^{0}_{1,2}`|:math:`\alpha^{1}_{1,2}`|:math:`\alpha^{0}_{2,1}`|:math:`\alpha^{1}_{2,1}`| + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |Coefficients |Coefficients |Coefficients | + |for SVs of class 0 |for SVs of class 1 |for SVs of class 2 | + +--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+ + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py` .. _scores_probabilities: @@ -284,10 +290,10 @@ to the sample weights: :align: center :scale: 75 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` - * :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py`, +* :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` +* :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` .. _svm_regression: @@ -308,10 +314,15 @@ target. There are three different implementations of Support Vector Regression: :class:`SVR`, :class:`NuSVR` and :class:`LinearSVR`. :class:`LinearSVR` -provides a faster implementation than :class:`SVR` but only considers -the linear kernel, while :class:`NuSVR` implements a slightly different -formulation than :class:`SVR` and :class:`LinearSVR`. See -:ref:`svm_implementation_details` for further details. +provides a faster implementation than :class:`SVR` but only considers the +linear kernel, while :class:`NuSVR` implements a slightly different formulation +than :class:`SVR` and :class:`LinearSVR`. Due to its implementation in +`liblinear` :class:`LinearSVR` also regularizes the intercept, if considered. +This effect can however be reduced by carefully fine tuning its +`intercept_scaling` parameter, which allows the intercept term to have a +different regularization behavior compared to the other features. The +classification results and score can therefore differ from the other two +classifiers. See :ref:`svm_implementation_details` for further details. As with classification classes, the fit method will take as argument vectors X, y, only that in this case y is expected to have @@ -327,9 +338,9 @@ floating point values instead of integer values:: array([1.5]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` .. _svm_outlier_detection: @@ -365,95 +376,95 @@ Tips on Practical Use ===================== - * **Avoiding data copy**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and - :class:`NuSVR`, if the data passed to certain methods is not C-ordered - contiguous and double precision, it will be copied before calling the - underlying C implementation. You can check whether a given numpy array is - C-contiguous by inspecting its ``flags`` attribute. - - For :class:`LinearSVC` (and :class:`LogisticRegression - `) any input passed as a numpy - array will be copied and converted to the `liblinear`_ internal sparse data - representation (double precision floats and int32 indices of non-zero - components). If you want to fit a large-scale linear classifier without - copying a dense numpy C-contiguous double precision array as input, we - suggest to use the :class:`SGDClassifier - ` class instead. The objective - function can be configured to be almost the same as the :class:`LinearSVC` - model. - - * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and - :class:`NuSVR`, the size of the kernel cache has a strong impact on run - times for larger problems. If you have enough RAM available, it is - recommended to set ``cache_size`` to a higher value than the default of - 200(MB), such as 500(MB) or 1000(MB). - - - * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default - choice. If you have a lot of noisy observations you should decrease it: - decreasing C corresponds to more regularization. - - :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when - it becomes large, and prediction results stop improving after a certain - threshold. Meanwhile, larger ``C`` values will take more time to train, - sometimes up to 10 times longer, as shown in [#3]_. - - * Support Vector Machine algorithms are not scale invariant, so **it - is highly recommended to scale your data**. For example, scale each - attribute on the input vector X to [0,1] or [-1,+1], or standardize it - to have mean 0 and variance 1. Note that the *same* scaling must be - applied to the test vector to obtain meaningful results. This can be done - easily by using a :class:`~sklearn.pipeline.Pipeline`:: - - >>> from sklearn.pipeline import make_pipeline - >>> from sklearn.preprocessing import StandardScaler - >>> from sklearn.svm import SVC - - >>> clf = make_pipeline(StandardScaler(), SVC()) - - See section :ref:`preprocessing` for more details on scaling and - normalization. - - .. _shrinking_svm: - - * Regarding the `shrinking` parameter, quoting [#4]_: *We found that if the - number of iterations is large, then shrinking can shorten the training - time. However, if we loosely solve the optimization problem (e.g., by - using a large stopping tolerance), the code without using shrinking may - be much faster* - - * Parameter ``nu`` in :class:`NuSVC`/:class:`OneClassSVM`/:class:`NuSVR` - approximates the fraction of training errors and support vectors. - - * In :class:`SVC`, if the data is unbalanced (e.g. many - positive and few negative), set ``class_weight='balanced'`` and/or try - different penalty parameters ``C``. - - * **Randomness of the underlying implementations**: The underlying - implementations of :class:`SVC` and :class:`NuSVC` use a random number - generator only to shuffle the data for probability estimation (when - ``probability`` is set to ``True``). This randomness can be controlled - with the ``random_state`` parameter. If ``probability`` is set to ``False`` - these estimators are not random and ``random_state`` has no effect on the - results. The underlying :class:`OneClassSVM` implementation is similar to - the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation - is provided for :class:`OneClassSVM`, it is not random. - - The underlying :class:`LinearSVC` implementation uses a random number - generator to select features when fitting the model with a dual coordinate - descent (i.e when ``dual`` is set to ``True``). It is thus not uncommon - to have slightly different results for the same input data. If that - happens, try with a smaller `tol` parameter. This randomness can also be - controlled with the ``random_state`` parameter. When ``dual`` is - set to ``False`` the underlying implementation of :class:`LinearSVC` is - not random and ``random_state`` has no effect on the results. - - * Using L1 penalization as provided by ``LinearSVC(penalty='l1', - dual=False)`` yields a sparse solution, i.e. only a subset of feature - weights is different from zero and contribute to the decision function. - Increasing ``C`` yields a more complex model (more features are selected). - The ``C`` value that yields a "null" model (all weights equal to zero) can - be calculated using :func:`l1_min_c`. +* **Avoiding data copy**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and + :class:`NuSVR`, if the data passed to certain methods is not C-ordered + contiguous and double precision, it will be copied before calling the + underlying C implementation. You can check whether a given numpy array is + C-contiguous by inspecting its ``flags`` attribute. + + For :class:`LinearSVC` (and :class:`LogisticRegression + `) any input passed as a numpy + array will be copied and converted to the `liblinear`_ internal sparse data + representation (double precision floats and int32 indices of non-zero + components). If you want to fit a large-scale linear classifier without + copying a dense numpy C-contiguous double precision array as input, we + suggest to use the :class:`SGDClassifier + ` class instead. The objective + function can be configured to be almost the same as the :class:`LinearSVC` + model. + +* **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and + :class:`NuSVR`, the size of the kernel cache has a strong impact on run + times for larger problems. If you have enough RAM available, it is + recommended to set ``cache_size`` to a higher value than the default of + 200(MB), such as 500(MB) or 1000(MB). + + +* **Setting C**: ``C`` is ``1`` by default and it's a reasonable default + choice. If you have a lot of noisy observations you should decrease it: + decreasing C corresponds to more regularization. + + :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when + it becomes large, and prediction results stop improving after a certain + threshold. Meanwhile, larger ``C`` values will take more time to train, + sometimes up to 10 times longer, as shown in [#3]_. + +* Support Vector Machine algorithms are not scale invariant, so **it + is highly recommended to scale your data**. For example, scale each + attribute on the input vector X to [0,1] or [-1,+1], or standardize it + to have mean 0 and variance 1. Note that the *same* scaling must be + applied to the test vector to obtain meaningful results. This can be done + easily by using a :class:`~sklearn.pipeline.Pipeline`:: + + >>> from sklearn.pipeline import make_pipeline + >>> from sklearn.preprocessing import StandardScaler + >>> from sklearn.svm import SVC + + >>> clf = make_pipeline(StandardScaler(), SVC()) + + See section :ref:`preprocessing` for more details on scaling and + normalization. + +.. _shrinking_svm: + +* Regarding the `shrinking` parameter, quoting [#4]_: *We found that if the + number of iterations is large, then shrinking can shorten the training + time. However, if we loosely solve the optimization problem (e.g., by + using a large stopping tolerance), the code without using shrinking may + be much faster* + +* Parameter ``nu`` in :class:`NuSVC`/:class:`OneClassSVM`/:class:`NuSVR` + approximates the fraction of training errors and support vectors. + +* In :class:`SVC`, if the data is unbalanced (e.g. many + positive and few negative), set ``class_weight='balanced'`` and/or try + different penalty parameters ``C``. + +* **Randomness of the underlying implementations**: The underlying + implementations of :class:`SVC` and :class:`NuSVC` use a random number + generator only to shuffle the data for probability estimation (when + ``probability`` is set to ``True``). This randomness can be controlled + with the ``random_state`` parameter. If ``probability`` is set to ``False`` + these estimators are not random and ``random_state`` has no effect on the + results. The underlying :class:`OneClassSVM` implementation is similar to + the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation + is provided for :class:`OneClassSVM`, it is not random. + + The underlying :class:`LinearSVC` implementation uses a random number + generator to select features when fitting the model with a dual coordinate + descent (i.e. when ``dual`` is set to ``True``). It is thus not uncommon + to have slightly different results for the same input data. If that + happens, try with a smaller `tol` parameter. This randomness can also be + controlled with the ``random_state`` parameter. When ``dual`` is + set to ``False`` the underlying implementation of :class:`LinearSVC` is + not random and ``random_state`` has no effect on the results. + +* Using L1 penalization as provided by ``LinearSVC(penalty='l1', + dual=False)`` yields a sparse solution, i.e. only a subset of feature + weights is different from zero and contribute to the decision function. + Increasing ``C`` yields a more complex model (more features are selected). + The ``C`` value that yields a "null" model (all weights equal to zero) can + be calculated using :func:`l1_min_c`. .. _svm_kernels: @@ -463,16 +474,16 @@ Kernel functions The *kernel function* can be any of the following: - * linear: :math:`\langle x, x'\rangle`. +* linear: :math:`\langle x, x'\rangle`. - * polynomial: :math:`(\gamma \langle x, x'\rangle + r)^d`, where - :math:`d` is specified by parameter ``degree``, :math:`r` by ``coef0``. +* polynomial: :math:`(\gamma \langle x, x'\rangle + r)^d`, where + :math:`d` is specified by parameter ``degree``, :math:`r` by ``coef0``. - * rbf: :math:`\exp(-\gamma \|x-x'\|^2)`, where :math:`\gamma` is - specified by parameter ``gamma``, must be greater than 0. +* rbf: :math:`\exp(-\gamma \|x-x'\|^2)`, where :math:`\gamma` is + specified by parameter ``gamma``, must be greater than 0. - * sigmoid :math:`\tanh(\gamma \langle x,x'\rangle + r)`, - where :math:`r` is specified by ``coef0``. +* sigmoid :math:`\tanh(\gamma \langle x,x'\rangle + r)`, + where :math:`r` is specified by ``coef0``. Different kernels are specified by the `kernel` parameter:: @@ -500,11 +511,10 @@ Proper choice of ``C`` and ``gamma`` is critical to the SVM's performance. One is advised to use :class:`~sklearn.model_selection.GridSearchCV` with ``C`` and ``gamma`` spaced exponentially far apart to choose good values. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py` +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py` Custom Kernels -------------- @@ -515,62 +525,60 @@ python function or by precomputing the Gram matrix. Classifiers with custom kernels behave the same way as any other classifiers, except that: - * Field ``support_vectors_`` is now empty, only indices of support - vectors are stored in ``support_`` +* Field ``support_vectors_`` is now empty, only indices of support + vectors are stored in ``support_`` - * A reference (and not a copy) of the first argument in the ``fit()`` - method is stored for future reference. If that array changes between the - use of ``fit()`` and ``predict()`` you will have unexpected results. +* A reference (and not a copy) of the first argument in the ``fit()`` + method is stored for future reference. If that array changes between the + use of ``fit()`` and ``predict()`` you will have unexpected results. -Using Python functions as kernels -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. dropdown:: Using Python functions as kernels -You can use your own defined kernels by passing a function to the -``kernel`` parameter. + You can use your own defined kernels by passing a function to the + ``kernel`` parameter. -Your kernel must take as arguments two matrices of shape -``(n_samples_1, n_features)``, ``(n_samples_2, n_features)`` -and return a kernel matrix of shape ``(n_samples_1, n_samples_2)``. + Your kernel must take as arguments two matrices of shape + ``(n_samples_1, n_features)``, ``(n_samples_2, n_features)`` + and return a kernel matrix of shape ``(n_samples_1, n_samples_2)``. -The following code defines a linear kernel and creates a classifier -instance that will use that kernel:: + The following code defines a linear kernel and creates a classifier + instance that will use that kernel:: - >>> import numpy as np - >>> from sklearn import svm - >>> def my_kernel(X, Y): - ... return np.dot(X, Y.T) - ... - >>> clf = svm.SVC(kernel=my_kernel) + >>> import numpy as np + >>> from sklearn import svm + >>> def my_kernel(X, Y): + ... return np.dot(X, Y.T) + ... + >>> clf = svm.SVC(kernel=my_kernel) -.. topic:: Examples: - * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`. +.. dropdown:: Using the Gram matrix -Using the Gram matrix -~~~~~~~~~~~~~~~~~~~~~ + You can pass pre-computed kernels by using the ``kernel='precomputed'`` + option. You should then pass Gram matrix instead of X to the `fit` and + `predict` methods. The kernel values between *all* training vectors and the + test vectors must be provided: -You can pass pre-computed kernels by using the ``kernel='precomputed'`` -option. You should then pass Gram matrix instead of X to the `fit` and -`predict` methods. The kernel values between *all* training vectors and the -test vectors must be provided: + >>> import numpy as np + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn import svm + >>> X, y = make_classification(n_samples=10, random_state=0) + >>> X_train , X_test , y_train, y_test = train_test_split(X, y, random_state=0) + >>> clf = svm.SVC(kernel='precomputed') + >>> # linear kernel computation + >>> gram_train = np.dot(X_train, X_train.T) + >>> clf.fit(gram_train, y_train) + SVC(kernel='precomputed') + >>> # predict on training examples + >>> gram_test = np.dot(X_test, X_train.T) + >>> clf.predict(gram_test) + array([0, 1, 0]) - >>> import numpy as np - >>> from sklearn.datasets import make_classification - >>> from sklearn.model_selection import train_test_split - >>> from sklearn import svm - >>> X, y = make_classification(n_samples=10, random_state=0) - >>> X_train , X_test , y_train, y_test = train_test_split(X, y, random_state=0) - >>> clf = svm.SVC(kernel='precomputed') - >>> # linear kernel computation - >>> gram_train = np.dot(X_train, X_train.T) - >>> clf.fit(gram_train, y_train) - SVC(kernel='precomputed') - >>> # predict on training examples - >>> gram_test = np.dot(X_test, X_train.T) - >>> clf.predict(gram_test) - array([0, 1, 0]) +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py` .. _svm_mathematical_formulation: @@ -667,38 +675,35 @@ term :math:`b` estimator used is :class:`~sklearn.linear_model.Ridge` regression, the relation between them is given as :math:`C = \frac{1}{alpha}`. -LinearSVC ---------- +.. dropdown:: LinearSVC -The primal problem can be equivalently formulated as + The primal problem can be equivalently formulated as -.. math:: + .. math:: - \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, 1 - y_i (w^T \phi(x_i) + b)), + \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, 1 - y_i (w^T \phi(x_i) + b)), -where we make use of the `hinge loss -`_. This is the form that is -directly optimized by :class:`LinearSVC`, but unlike the dual form, this one -does not involve inner products between samples, so the famous kernel trick -cannot be applied. This is why only the linear kernel is supported by -:class:`LinearSVC` (:math:`\phi` is the identity function). + where we make use of the `hinge loss + `_. This is the form that is + directly optimized by :class:`LinearSVC`, but unlike the dual form, this one + does not involve inner products between samples, so the famous kernel trick + cannot be applied. This is why only the linear kernel is supported by + :class:`LinearSVC` (:math:`\phi` is the identity function). .. _nu_svc: -NuSVC ------ - -The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the -:math:`C`-SVC and therefore mathematically equivalent. +.. dropdown:: NuSVC -We introduce a new parameter :math:`\nu` (instead of :math:`C`) which -controls the number of support vectors and *margin errors*: -:math:`\nu \in (0, 1]` is an upper bound on the fraction of margin errors and -a lower bound of the fraction of support vectors. A margin error corresponds -to a sample that lies on the wrong side of its margin boundary: it is either -misclassified, or it is correctly classified but does not lie beyond the -margin. + The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the + :math:`C`-SVC and therefore mathematically equivalent. + We introduce a new parameter :math:`\nu` (instead of :math:`C`) which + controls the number of support vectors and *margin errors*: + :math:`\nu \in (0, 1]` is an upper bound on the fraction of margin errors and + a lower bound of the fraction of support vectors. A margin error corresponds + to a sample that lies on the wrong side of its margin boundary: it is either + misclassified, or it is correctly classified but does not lie beyond the + margin. SVR --- @@ -747,18 +752,17 @@ which holds the difference :math:`\alpha_i - \alpha_i^*`, ``support_vectors_`` w holds the support vectors, and ``intercept_`` which holds the independent term :math:`b` -LinearSVR ---------- +.. dropdown:: LinearSVR -The primal problem can be equivalently formulated as + The primal problem can be equivalently formulated as -.. math:: + .. math:: - \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, |y_i - (w^T \phi(x_i) + b)| - \varepsilon), + \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, |y_i - (w^T \phi(x_i) + b)| - \varepsilon), -where we make use of the epsilon-insensitive loss, i.e. errors of less than -:math:`\varepsilon` are ignored. This is the form that is directly optimized -by :class:`LinearSVR`. + where we make use of the epsilon-insensitive loss, i.e. errors of less than + :math:`\varepsilon` are ignored. This is the form that is directly optimized + by :class:`LinearSVR`. .. _svm_implementation_details: @@ -774,38 +778,37 @@ used, please refer to their respective papers. .. _`libsvm`: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ .. _`liblinear`: https://www.csie.ntu.edu.tw/~cjlin/liblinear/ -.. topic:: References: +.. rubric:: References - .. [#1] Platt `"Probabilistic outputs for SVMs and comparisons to - regularized likelihood methods" - `_. +.. [#1] Platt `"Probabilistic outputs for SVMs and comparisons to + regularized likelihood methods" + `_. - .. [#2] Wu, Lin and Weng, `"Probability estimates for multi-class - classification by pairwise coupling" - `_, JMLR - 5:975-1005, 2004. +.. [#2] Wu, Lin and Weng, `"Probability estimates for multi-class + classification by pairwise coupling" + `_, + JMLR 5:975-1005, 2004. - .. [#3] Fan, Rong-En, et al., - `"LIBLINEAR: A library for large linear classification." - `_, - Journal of machine learning research 9.Aug (2008): 1871-1874. +.. [#3] Fan, Rong-En, et al., + `"LIBLINEAR: A library for large linear classification." + `_, + Journal of machine learning research 9.Aug (2008): 1871-1874. - .. [#4] Chang and Lin, `LIBSVM: A Library for Support Vector Machines - `_. +.. [#4] Chang and Lin, `LIBSVM: A Library for Support Vector Machines + `_. - .. [#5] Bishop, `Pattern recognition and machine learning - `_, - chapter 7 Sparse Kernel Machines +.. [#5] Bishop, `Pattern recognition and machine learning + `_, + chapter 7 Sparse Kernel Machines - .. [#6] :doi:`"A Tutorial on Support Vector Regression" - <10.1023/B:STCO.0000035301.49549.88>` - Alex J. Smola, Bernhard SchÃļlkopf - Statistics and Computing archive - Volume 14 Issue 3, August 2004, p. 199-222. +.. [#6] :doi:`"A Tutorial on Support Vector Regression" + <10.1023/B:STCO.0000035301.49549.88>` + Alex J. Smola, Bernhard SchÃļlkopf - Statistics and Computing archive + Volume 14 Issue 3, August 2004, p. 199-222. - .. [#7] SchÃļlkopf et. al `New Support Vector Algorithms - `_ +.. [#7] SchÃļlkopf et. al `New Support Vector Algorithms + `_ - .. [#8] Crammer and Singer `On the Algorithmic Implementation ofMulticlass - Kernel-based Vector Machines - `_, - JMLR 2001. +.. [#8] Crammer and Singer `On the Algorithmic Implementation ofMulticlass + Kernel-based Vector Machines + `_, JMLR 2001. diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index f7d43c5a3d7da..9b475d6c09f5f 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -23,68 +23,68 @@ the tree, the more complex the decision rules and the fitter the model. Some advantages of decision trees are: - - Simple to understand and to interpret. Trees can be visualized. +- Simple to understand and to interpret. Trees can be visualized. - - Requires little data preparation. Other techniques often require data - normalization, dummy variables need to be created and blank values to - be removed. Note however that this module does not support missing - values. +- Requires little data preparation. Other techniques often require data + normalization, dummy variables need to be created and blank values to + be removed. Some tree and algorithm combinations support + :ref:`missing values `. - - The cost of using the tree (i.e., predicting data) is logarithmic in the - number of data points used to train the tree. +- The cost of using the tree (i.e., predicting data) is logarithmic in the + number of data points used to train the tree. - - Able to handle both numerical and categorical data. However, the scikit-learn - implementation does not support categorical variables for now. Other - techniques are usually specialized in analyzing datasets that have only one type - of variable. See :ref:`algorithms ` for more - information. +- Able to handle both numerical and categorical data. However, the scikit-learn + implementation does not support categorical variables for now. Other + techniques are usually specialized in analyzing datasets that have only one type + of variable. See :ref:`algorithms ` for more + information. - - Able to handle multi-output problems. +- Able to handle multi-output problems. - - Uses a white box model. If a given situation is observable in a model, - the explanation for the condition is easily explained by boolean logic. - By contrast, in a black box model (e.g., in an artificial neural - network), results may be more difficult to interpret. +- Uses a white box model. If a given situation is observable in a model, + the explanation for the condition is easily explained by boolean logic. + By contrast, in a black box model (e.g., in an artificial neural + network), results may be more difficult to interpret. - - Possible to validate a model using statistical tests. That makes it - possible to account for the reliability of the model. +- Possible to validate a model using statistical tests. That makes it + possible to account for the reliability of the model. - - Performs well even if its assumptions are somewhat violated by - the true model from which the data were generated. +- Performs well even if its assumptions are somewhat violated by + the true model from which the data were generated. The disadvantages of decision trees include: - - Decision-tree learners can create over-complex trees that do not - generalize the data well. This is called overfitting. Mechanisms - such as pruning, setting the minimum number of samples required - at a leaf node or setting the maximum depth of the tree are - necessary to avoid this problem. +- Decision-tree learners can create over-complex trees that do not + generalize the data well. This is called overfitting. Mechanisms + such as pruning, setting the minimum number of samples required + at a leaf node or setting the maximum depth of the tree are + necessary to avoid this problem. - - Decision trees can be unstable because small variations in the - data might result in a completely different tree being generated. - This problem is mitigated by using decision trees within an - ensemble. +- Decision trees can be unstable because small variations in the + data might result in a completely different tree being generated. + This problem is mitigated by using decision trees within an + ensemble. - - Predictions of decision trees are neither smooth nor continuous, but - piecewise constant approximations as seen in the above figure. Therefore, - they are not good at extrapolation. +- Predictions of decision trees are neither smooth nor continuous, but + piecewise constant approximations as seen in the above figure. Therefore, + they are not good at extrapolation. - - The problem of learning an optimal decision tree is known to be - NP-complete under several aspects of optimality and even for simple - concepts. Consequently, practical decision-tree learning algorithms - are based on heuristic algorithms such as the greedy algorithm where - locally optimal decisions are made at each node. Such algorithms - cannot guarantee to return the globally optimal decision tree. This - can be mitigated by training multiple trees in an ensemble learner, - where the features and samples are randomly sampled with replacement. +- The problem of learning an optimal decision tree is known to be + NP-complete under several aspects of optimality and even for simple + concepts. Consequently, practical decision-tree learning algorithms + are based on heuristic algorithms such as the greedy algorithm where + locally optimal decisions are made at each node. Such algorithms + cannot guarantee to return the globally optimal decision tree. This + can be mitigated by training multiple trees in an ensemble learner, + where the features and samples are randomly sampled with replacement. - - There are concepts that are hard to learn because decision trees - do not express them easily, such as XOR, parity or multiplexer problems. +- There are concepts that are hard to learn because decision trees + do not express them easily, such as XOR, parity or multiplexer problems. - - Decision tree learners create biased trees if some classes dominate. - It is therefore recommended to balance the dataset prior to fitting - with the decision tree. +- Decision tree learners create biased trees if some classes dominate. + It is therefore recommended to balance the dataset prior to fitting + with the decision tree. .. _tree_classification: @@ -146,76 +146,78 @@ Once trained, you can plot the tree with the :func:`plot_tree` function:: :scale: 75 :align: center -We can also export the tree in `Graphviz -`_ format using the :func:`export_graphviz` -exporter. If you use the `conda `_ package manager, the graphviz binaries -and the python package can be installed with `conda install python-graphviz`. +.. dropdown:: Alternative ways to export trees -Alternatively binaries for graphviz can be downloaded from the graphviz project homepage, -and the Python wrapper installed from pypi with `pip install graphviz`. + We can also export the tree in `Graphviz + `_ format using the :func:`export_graphviz` + exporter. If you use the `conda `_ package manager, the graphviz binaries + and the python package can be installed with `conda install python-graphviz`. -Below is an example graphviz export of the above tree trained on the entire -iris dataset; the results are saved in an output file `iris.pdf`:: + Alternatively binaries for graphviz can be downloaded from the graphviz project homepage, + and the Python wrapper installed from pypi with `pip install graphviz`. + Below is an example graphviz export of the above tree trained on the entire + iris dataset; the results are saved in an output file `iris.pdf`:: - >>> import graphviz # doctest: +SKIP - >>> dot_data = tree.export_graphviz(clf, out_file=None) # doctest: +SKIP - >>> graph = graphviz.Source(dot_data) # doctest: +SKIP - >>> graph.render("iris") # doctest: +SKIP -The :func:`export_graphviz` exporter also supports a variety of aesthetic -options, including coloring nodes by their class (or value for regression) and -using explicit variable and class names if desired. Jupyter notebooks also -render these plots inline automatically:: + >>> import graphviz # doctest: +SKIP + >>> dot_data = tree.export_graphviz(clf, out_file=None) # doctest: +SKIP + >>> graph = graphviz.Source(dot_data) # doctest: +SKIP + >>> graph.render("iris") # doctest: +SKIP - >>> dot_data = tree.export_graphviz(clf, out_file=None, # doctest: +SKIP - ... feature_names=iris.feature_names, # doctest: +SKIP - ... class_names=iris.target_names, # doctest: +SKIP - ... filled=True, rounded=True, # doctest: +SKIP - ... special_characters=True) # doctest: +SKIP - >>> graph = graphviz.Source(dot_data) # doctest: +SKIP - >>> graph # doctest: +SKIP + The :func:`export_graphviz` exporter also supports a variety of aesthetic + options, including coloring nodes by their class (or value for regression) and + using explicit variable and class names if desired. Jupyter notebooks also + render these plots inline automatically:: -.. only:: html + >>> dot_data = tree.export_graphviz(clf, out_file=None, # doctest: +SKIP + ... feature_names=iris.feature_names, # doctest: +SKIP + ... class_names=iris.target_names, # doctest: +SKIP + ... filled=True, rounded=True, # doctest: +SKIP + ... special_characters=True) # doctest: +SKIP + >>> graph = graphviz.Source(dot_data) # doctest: +SKIP + >>> graph # doctest: +SKIP - .. figure:: ../images/iris.svg - :align: center + .. only:: html -.. only:: latex + .. figure:: ../images/iris.svg + :align: center - .. figure:: ../images/iris.pdf - :align: center + .. only:: latex -.. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_dtc_001.png - :target: ../auto_examples/tree/plot_iris_dtc.html - :align: center - :scale: 75 + .. figure:: ../images/iris.pdf + :align: center -Alternatively, the tree can also be exported in textual format with the -function :func:`export_text`. This method doesn't require the installation -of external libraries and is more compact: + .. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_dtc_001.png + :target: ../auto_examples/tree/plot_iris_dtc.html + :align: center + :scale: 75 - >>> from sklearn.datasets import load_iris - >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn.tree import export_text - >>> iris = load_iris() - >>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) - >>> decision_tree = decision_tree.fit(iris.data, iris.target) - >>> r = export_text(decision_tree, feature_names=iris['feature_names']) - >>> print(r) - |--- petal width (cm) <= 0.80 - | |--- class: 0 - |--- petal width (cm) > 0.80 - | |--- petal width (cm) <= 1.75 - | | |--- class: 1 - | |--- petal width (cm) > 1.75 - | | |--- class: 2 - - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` - * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` + Alternatively, the tree can also be exported in textual format with the + function :func:`export_text`. This method doesn't require the installation + of external libraries and is more compact: + + >>> from sklearn.datasets import load_iris + >>> from sklearn.tree import DecisionTreeClassifier + >>> from sklearn.tree import export_text + >>> iris = load_iris() + >>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) + >>> decision_tree = decision_tree.fit(iris.data, iris.target) + >>> r = export_text(decision_tree, feature_names=iris['feature_names']) + >>> print(r) + |--- petal width (cm) <= 0.80 + | |--- class: 0 + |--- petal width (cm) > 0.80 + | |--- petal width (cm) <= 1.75 + | | |--- class: 1 + | |--- petal width (cm) > 1.75 + | | |--- class: 2 + + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` +* :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` .. _tree_regression: @@ -242,9 +244,9 @@ instead of integer values:: >>> clf.predict([[1, 1]]) array([0.5]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression.py` +* :ref:`sphx_glr_auto_examples_tree_plot_tree_regression.py` .. _tree_multioutput: @@ -267,20 +269,19 @@ generalization accuracy of the resulting estimator may often be increased. With regard to decision trees, this strategy can readily be used to support multi-output problems. This requires the following changes: - - Store n output values in leaves, instead of 1; - - Use splitting criteria that compute the average reduction across all - n outputs. +- Store n output values in leaves, instead of 1; +- Use splitting criteria that compute the average reduction across all + n outputs. This module offers support for multi-output problems by implementing this strategy in both :class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor`. If a decision tree is fit on an output array Y of shape ``(n_samples, n_outputs)`` then the resulting estimator will: - * Output n_output values upon ``predict``; - - * Output a list of n_output arrays of class probabilities upon - ``predict_proba``. +* Output n_output values upon ``predict``; +* Output a list of n_output arrays of class probabilities upon + ``predict_proba``. The use of multi-output trees for regression is demonstrated in :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`. In this example, the input @@ -301,17 +302,17 @@ the lower half of those faces. :scale: 75 :align: center -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` +* :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` -.. topic:: References: +.. rubric:: References - * M. Dumont et al, `Fast multi-class image annotation with random subwindows - and multiple output randomized trees - `_, International Conference on - Computer Vision Theory and Applications 2009 +* M. Dumont et al, `Fast multi-class image annotation with random subwindows + and multiple output randomized trees + `_, + International Conference on Computer Vision Theory and Applications 2009 .. _tree_complexity: @@ -334,65 +335,65 @@ total cost over the entire trees (by summing the cost at each node) of Tips on practical use ===================== - * Decision trees tend to overfit on data with a large number of features. - Getting the right ratio of samples to number of features is important, since - a tree with few samples in high dimensional space is very likely to overfit. - - * Consider performing dimensionality reduction (:ref:`PCA `, - :ref:`ICA `, or :ref:`feature_selection`) beforehand to - give your tree a better chance of finding features that are discriminative. - - * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` will help - in gaining more insights about how the decision tree makes predictions, which is - important for understanding the important features in the data. - - * Visualize your tree as you are training by using the ``export`` - function. Use ``max_depth=3`` as an initial tree depth to get a feel for - how the tree is fitting to your data, and then increase the depth. - - * Remember that the number of samples required to populate the tree doubles - for each additional level the tree grows to. Use ``max_depth`` to control - the size of the tree to prevent overfitting. - - * Use ``min_samples_split`` or ``min_samples_leaf`` to ensure that multiple - samples inform every decision in the tree, by controlling which splits will - be considered. A very small number will usually mean the tree will overfit, - whereas a large number will prevent the tree from learning the data. Try - ``min_samples_leaf=5`` as an initial value. If the sample size varies - greatly, a float number can be used as percentage in these two parameters. - While ``min_samples_split`` can create arbitrarily small leaves, - ``min_samples_leaf`` guarantees that each leaf has a minimum size, avoiding - low-variance, over-fit leaf nodes in regression problems. For - classification with few classes, ``min_samples_leaf=1`` is often the best - choice. - - Note that ``min_samples_split`` considers samples directly and independent of - ``sample_weight``, if provided (e.g. a node with m weighted samples is still - treated as having exactly m samples). Consider ``min_weight_fraction_leaf`` or - ``min_impurity_decrease`` if accounting for sample weights is required at splits. - - * Balance your dataset before training to prevent the tree from being biased - toward the classes that are dominant. Class balancing can be done by - sampling an equal number of samples from each class, or preferably by - normalizing the sum of the sample weights (``sample_weight``) for each - class to the same value. Also note that weight-based pre-pruning criteria, - such as ``min_weight_fraction_leaf``, will then be less biased toward - dominant classes than criteria that are not aware of the sample weights, - like ``min_samples_leaf``. - - * If the samples are weighted, it will be easier to optimize the tree - structure using weight-based pre-pruning criterion such as - ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least - a fraction of the overall sum of the sample weights. - - * All decision trees use ``np.float32`` arrays internally. - If training data is not in this format, a copy of the dataset will be made. - - * If the input matrix X is very sparse, it is recommended to convert to sparse - ``csc_matrix`` before calling fit and sparse ``csr_matrix`` before calling - predict. Training time can be orders of magnitude faster for a sparse - matrix input compared to a dense matrix when features have zero values in - most of the samples. +* Decision trees tend to overfit on data with a large number of features. + Getting the right ratio of samples to number of features is important, since + a tree with few samples in high dimensional space is very likely to overfit. + +* Consider performing dimensionality reduction (:ref:`PCA `, + :ref:`ICA `, or :ref:`feature_selection`) beforehand to + give your tree a better chance of finding features that are discriminative. + +* :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` will help + in gaining more insights about how the decision tree makes predictions, which is + important for understanding the important features in the data. + +* Visualize your tree as you are training by using the ``export`` + function. Use ``max_depth=3`` as an initial tree depth to get a feel for + how the tree is fitting to your data, and then increase the depth. + +* Remember that the number of samples required to populate the tree doubles + for each additional level the tree grows to. Use ``max_depth`` to control + the size of the tree to prevent overfitting. + +* Use ``min_samples_split`` or ``min_samples_leaf`` to ensure that multiple + samples inform every decision in the tree, by controlling which splits will + be considered. A very small number will usually mean the tree will overfit, + whereas a large number will prevent the tree from learning the data. Try + ``min_samples_leaf=5`` as an initial value. If the sample size varies + greatly, a float number can be used as percentage in these two parameters. + While ``min_samples_split`` can create arbitrarily small leaves, + ``min_samples_leaf`` guarantees that each leaf has a minimum size, avoiding + low-variance, over-fit leaf nodes in regression problems. For + classification with few classes, ``min_samples_leaf=1`` is often the best + choice. + + Note that ``min_samples_split`` considers samples directly and independent of + ``sample_weight``, if provided (e.g. a node with m weighted samples is still + treated as having exactly m samples). Consider ``min_weight_fraction_leaf`` or + ``min_impurity_decrease`` if accounting for sample weights is required at splits. + +* Balance your dataset before training to prevent the tree from being biased + toward the classes that are dominant. Class balancing can be done by + sampling an equal number of samples from each class, or preferably by + normalizing the sum of the sample weights (``sample_weight``) for each + class to the same value. Also note that weight-based pre-pruning criteria, + such as ``min_weight_fraction_leaf``, will then be less biased toward + dominant classes than criteria that are not aware of the sample weights, + like ``min_samples_leaf``. + +* If the samples are weighted, it will be easier to optimize the tree + structure using weight-based pre-pruning criterion such as + ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least + a fraction of the overall sum of the sample weights. + +* All decision trees use ``np.float32`` arrays internally. + If training data is not in this format, a copy of the dataset will be made. + +* If the input matrix X is very sparse, it is recommended to convert to sparse + ``csc_matrix`` before calling fit and sparse ``csr_matrix`` before calling + predict. Training time can be orders of magnitude faster for a sparse + matrix input compared to a dense matrix when features have zero values in + most of the samples. .. _tree_algorithms: @@ -403,30 +404,32 @@ Tree algorithms: ID3, C4.5, C5.0 and CART What are all the various decision tree algorithms and how do they differ from each other? Which one is implemented in scikit-learn? -ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan. -The algorithm creates a multiway tree, finding for each node (i.e. in -a greedy manner) the categorical feature that will yield the largest -information gain for categorical targets. Trees are grown to their -maximum size and then a pruning step is usually applied to improve the -ability of the tree to generalize to unseen data. - -C4.5 is the successor to ID3 and removed the restriction that features -must be categorical by dynamically defining a discrete attribute (based -on numerical variables) that partitions the continuous attribute value -into a discrete set of intervals. C4.5 converts the trained trees -(i.e. the output of the ID3 algorithm) into sets of if-then rules. -The accuracy of each rule is then evaluated to determine the order -in which they should be applied. Pruning is done by removing a rule's -precondition if the accuracy of the rule improves without it. - -C5.0 is Quinlan's latest version release under a proprietary license. -It uses less memory and builds smaller rulesets than C4.5 while being -more accurate. - -CART (Classification and Regression Trees) is very similar to C4.5, but -it differs in that it supports numerical target variables (regression) and -does not compute rule sets. CART constructs binary trees using the feature -and threshold that yield the largest information gain at each node. +.. dropdown:: Various decision tree algorithms + + ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan. + The algorithm creates a multiway tree, finding for each node (i.e. in + a greedy manner) the categorical feature that will yield the largest + information gain for categorical targets. Trees are grown to their + maximum size and then a pruning step is usually applied to improve the + ability of the tree to generalize to unseen data. + + C4.5 is the successor to ID3 and removed the restriction that features + must be categorical by dynamically defining a discrete attribute (based + on numerical variables) that partitions the continuous attribute value + into a discrete set of intervals. C4.5 converts the trained trees + (i.e. the output of the ID3 algorithm) into sets of if-then rules. + The accuracy of each rule is then evaluated to determine the order + in which they should be applied. Pruning is done by removing a rule's + precondition if the accuracy of the rule improves without it. + + C5.0 is Quinlan's latest version release under a proprietary license. + It uses less memory and builds smaller rulesets than C4.5 while being + more accurate. + + CART (Classification and Regression Trees) is very similar to C4.5, but + it differs in that it supports numerical target variables (regression) and + does not compute rule sets. CART constructs binary trees using the feature + and threshold that yield the largest information gain at each node. scikit-learn uses an optimized version of the CART algorithm; however, the scikit-learn implementation does not support categorical variables for now. @@ -500,8 +503,7 @@ Log Loss or Entropy: H(Q_m) = - \sum_k p_{mk} \log(p_{mk}) - -.. note:: +.. dropdown:: Shannon entropy The entropy criterion computes the Shannon entropy of the possible classes. It takes the class frequencies of the training data points that reached a given @@ -577,7 +579,7 @@ Note that it fits much slower than the MSE criterion. Missing Values Support ====================== -:class:`~tree.DecisionTreeClassifier` and :class:`~tree.DecisionTreeRegressor` +:class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor` have built-in support for missing values when `splitter='best'` and criterion is `'gini'`, `'entropy`', or `'log_loss'`, for classification or `'squared_error'`, `'friedman_mse'`, or `'poisson'` for regression. @@ -587,50 +589,50 @@ the split with all the missing values going to the left node or the right node. Decisions are made as follows: - - By default when predicting, the samples with missing values are classified - with the class used in the split found during training:: +- By default when predicting, the samples with missing values are classified + with the class used in the split found during training:: - >>> from sklearn.tree import DecisionTreeClassifier - >>> import numpy as np + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np - >>> X = np.array([0, 1, 6, np.nan]).reshape(-1, 1) - >>> y = [0, 0, 1, 1] + >>> X = np.array([0, 1, 6, np.nan]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] - >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) - >>> tree.predict(X) - array([0, 0, 1, 1]) + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + >>> tree.predict(X) + array([0, 0, 1, 1]) - - If the the criterion evaluation is the same for both nodes, - then the tie for missing value at predict time is broken by going to the - right node. The splitter also checks the split where all the missing - values go to one child and non-missing values go to the other:: +- If the criterion evaluation is the same for both nodes, + then the tie for missing value at predict time is broken by going to the + right node. The splitter also checks the split where all the missing + values go to one child and non-missing values go to the other:: - >>> from sklearn.tree import DecisionTreeClassifier - >>> import numpy as np + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np - >>> X = np.array([np.nan, -1, np.nan, 1]).reshape(-1, 1) - >>> y = [0, 0, 1, 1] + >>> X = np.array([np.nan, -1, np.nan, 1]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] - >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) - >>> X_test = np.array([np.nan]).reshape(-1, 1) - >>> tree.predict(X_test) - array([1]) + >>> X_test = np.array([np.nan]).reshape(-1, 1) + >>> tree.predict(X_test) + array([1]) - - If no missing values are seen during training for a given feature, then during - prediction missing values are mapped to the child with the most samples:: +- If no missing values are seen during training for a given feature, then during + prediction missing values are mapped to the child with the most samples:: - >>> from sklearn.tree import DecisionTreeClassifier - >>> import numpy as np + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np - >>> X = np.array([0, 1, 2, 3]).reshape(-1, 1) - >>> y = [0, 1, 1, 1] + >>> X = np.array([0, 1, 2, 3]).reshape(-1, 1) + >>> y = [0, 1, 1, 1] - >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) - >>> X_test = np.array([np.nan]).reshape(-1, 1) - >>> tree.predict(X_test) - array([1]) + >>> X_test = np.array([np.nan]).reshape(-1, 1) + >>> tree.predict(X_test) + array([1]) .. _minimal_cost_complexity_pruning: @@ -667,21 +669,21 @@ with the smallest value of :math:`\alpha_{eff}` is the weakest link and will be pruned. This process stops when the pruned tree's minimal :math:`\alpha_{eff}` is greater than the ``ccp_alpha`` parameter. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py` +* :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py` -.. topic:: References: +.. rubric:: References - .. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification - and Regression Trees. Wadsworth, Belmont, CA, 1984. +.. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification + and Regression Trees. Wadsworth, Belmont, CA, 1984. - * https://en.wikipedia.org/wiki/Decision_tree_learning +* https://en.wikipedia.org/wiki/Decision_tree_learning - * https://en.wikipedia.org/wiki/Predictive_analytics +* https://en.wikipedia.org/wiki/Predictive_analytics - * J.R. Quinlan. C4. 5: programs for machine learning. Morgan - Kaufmann, 1993. +* J.R. Quinlan. C4. 5: programs for machine learning. Morgan + Kaufmann, 1993. - * T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical - Learning, Springer, 2009. +* T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical + Learning, Springer, 2009. diff --git a/doc/modules/unsupervised_reduction.rst b/doc/modules/unsupervised_reduction.rst index 6e16886064cfc..f94d6ac301e47 100644 --- a/doc/modules/unsupervised_reduction.rst +++ b/doc/modules/unsupervised_reduction.rst @@ -24,20 +24,20 @@ PCA: principal component analysis :class:`decomposition.PCA` looks for a combination of features that capture well the variance of the original features. See :ref:`decompositions`. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` Random projections ------------------- -The module: :mod:`random_projection` provides several tools for data +The module: :mod:`~sklearn.random_projection` provides several tools for data reduction by random projections. See the relevant section of the documentation: :ref:`random_projection`. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` Feature agglomeration ------------------------ @@ -46,15 +46,14 @@ Feature agglomeration :ref:`hierarchical_clustering` to group together features that behave similarly. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py` - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_agglomeration.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_digits_agglomeration.py` .. topic:: **Feature scaling** Note that if features have very different scaling or statistical properties, :class:`cluster.FeatureAgglomeration` may not be able to - capture the links between related features. Using a + capture the links between related features. Using a :class:`preprocessing.StandardScaler` can be useful in these settings. - diff --git a/doc/preface.rst b/doc/preface.rst deleted file mode 100644 index 447083a3a8136..0000000000000 --- a/doc/preface.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. This helps define the TOC ordering for "about us" sections. Particularly - useful for PDF output as this section is not linked from elsewhere. - -.. Places global toc into the sidebar - -:globalsidebartoc: True - -.. _preface_menu: - -.. include:: includes/big_toc_css.rst -.. include:: tune_toc.rst - -======================= -Welcome to scikit-learn -======================= - -| - -.. toctree:: - :maxdepth: 2 - - install - faq - support - related_projects - about - testimonials/testimonials - whats_new - roadmap - governance - -| diff --git a/doc/presentations.rst b/doc/presentations.rst index 47b7f16bd74a0..92f23c0fa26cb 100644 --- a/doc/presentations.rst +++ b/doc/presentations.rst @@ -2,9 +2,6 @@ External Resources, Videos and Talks =========================================== -For written tutorials, see the :ref:`Tutorial section ` of -the documentation. - New to Scientific Python? ========================== For those that are still new to the scientific Python ecosystem, we highly @@ -37,40 +34,38 @@ Videos `_ by `Gael Varoquaux`_ at ICML 2010 - A three minute video from a very early stage of scikit-learn, explaining the - basic idea and approach we are following. + A three minute video from a very early stage of scikit-learn, explaining the + basic idea and approach we are following. - `Introduction to statistical learning with scikit-learn `_ by `Gael Varoquaux`_ at SciPy 2011 - An extensive tutorial, consisting of four sessions of one hour. - The tutorial covers the basics of machine learning, - many algorithms and how to apply them using scikit-learn. The - material corresponding is now in the scikit-learn documentation - section :ref:`stat_learn_tut_index`. + An extensive tutorial, consisting of four sessions of one hour. + The tutorial covers the basics of machine learning, + many algorithms and how to apply them using scikit-learn. - `Statistical Learning for Text Classification with scikit-learn and NLTK `_ (and `slides `_) by `Olivier Grisel`_ at PyCon 2011 - Thirty minute introduction to text classification. Explains how to - use NLTK and scikit-learn to solve real-world text classification - tasks and compares against cloud-based solutions. + Thirty minute introduction to text classification. Explains how to + use NLTK and scikit-learn to solve real-world text classification + tasks and compares against cloud-based solutions. - `Introduction to Interactive Predictive Analytics in Python with scikit-learn `_ by `Olivier Grisel`_ at PyCon 2012 - 3-hours long introduction to prediction tasks using scikit-learn. + 3-hours long introduction to prediction tasks using scikit-learn. - `scikit-learn - Machine Learning in Python `_ by `Jake Vanderplas`_ at the 2012 PyData workshop at Google - Interactive demonstration of some scikit-learn features. 75 minutes. + Interactive demonstration of some scikit-learn features. 75 minutes. - `scikit-learn tutorial `_ by `Jake Vanderplas`_ at PyData NYC 2012 - Presentation using the online tutorial, 45 minutes. + Presentation using the online tutorial, 45 minutes. .. _Gael Varoquaux: https://gael-varoquaux.info diff --git a/doc/related_projects.rst b/doc/related_projects.rst index 9cc70ad89ffff..e6d0bd83f0a16 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -21,9 +21,6 @@ enhance the functionality of scikit-learn's estimators. **Data formats** -- `Fast svmlight / libsvm file loader `_ - Fast and memory-efficient svmlight / libsvm file loader for Python. - - `sklearn_pandas `_ bridge for scikit-learn pipelines and pandas data frame with dedicated transformers. @@ -64,19 +61,20 @@ enhance the functionality of scikit-learn's estimators. It incorporates multiple modeling libraries under one API, and the objects that EvalML creates use an sklearn-compatible API. -**Experimentation frameworks** +**Experimentation and model registry frameworks** + +- `MLFlow `_ MLflow is an open source platform to manage the ML + lifecycle, including experimentation, reproducibility, deployment, and a central + model registry. - `Neptune `_ Metadata store for MLOps, - built for teams that run a lot of experiments.‌ It gives you a single + built for teams that run a lot of experiments. It gives you a single place to log, store, display, organize, compare, and query all your model building metadata. - `Sacred `_ Tool to help you configure, organize, log and reproduce experiments -- `REP `_ Environment for conducting data-driven - research in a consistent and reproducible way - - `Scikit-Learn Laboratory `_ A command-line wrapper around scikit-learn that makes it easy to run machine learning @@ -91,10 +89,7 @@ enhance the functionality of scikit-learn's estimators. debugging/inspecting machine learning models and explaining their predictions. -- `mlxtend `_ Includes model visualization - utilities. - -- `sklearn-evaluation `_ +- `sklearn-evaluation `_ Machine learning model evaluation made easy: plots, tables, HTML reports, experiment tracking and Jupyter notebook analysis. Visual analysis, model selection, evaluation and diagnostics. @@ -140,7 +135,15 @@ enhance the functionality of scikit-learn's estimators. - `treelite `_ Compiles tree-based ensemble models into C code for minimizing prediction latency. - + +- `micromlgen `_ + MicroML brings Machine Learning algorithms to microcontrollers. + Supports several scikit-learn classifiers by transpiling them to C code. + +- `emlearn `_ + Implements scikit-learn estimators in C99 for embedded devices and microcontrollers. + Supports several classifier, regression and outlier detection models. + **Model throughput** - `Intel(R) Extension for scikit-learn `_ @@ -161,12 +164,40 @@ project. The following are projects providing interfaces similar to scikit-learn for additional learning algorithms, infrastructures and tasks. -**Structured learning** +**Time series and forecasting** + +- `Darts `_ Darts is a Python library for + user-friendly forecasting and anomaly detection on time series. It contains a variety + of models, from classics such as ARIMA to deep neural networks. The forecasting + models can all be used in the same way, using fit() and predict() functions, similar + to scikit-learn. + +- `sktime `_ A scikit-learn compatible + toolbox for machine learning with time series including time series + classification/regression and (supervised/panel) forecasting. + +- `skforecast `_ A python library + that eases using scikit-learn regressors as multi-step forecasters. It also works + with any regressor compatible with the scikit-learn API. + +- `tslearn `_ A machine learning library for + time series that offers tools for pre-processing and feature extraction as well as + dedicated models for clustering, classification and regression. + +**Gradient (tree) boosting** -- `tslearn `_ A machine learning library for time series - that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression. +Note scikit-learn own modern gradient boosting estimators +:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and +:class:`~sklearn.ensemble.HistGradientBoostingRegressor`. -- `sktime `_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting. +- `XGBoost `_ XGBoost is an optimized distributed + gradient boosting library designed to be highly efficient, flexible and portable. + +- `LightGBM `_ LightGBM is a gradient boosting + framework that uses tree based learning algorithms. It is designed to be distributed + and efficient. + +**Structured learning** - `HMMLearn `_ Implementation of hidden markov models that was previously part of scikit-learn. @@ -182,21 +213,9 @@ and tasks. (`CRFsuite `_ wrapper with sklearn-like API). -- `skforecast `_ A python library - that eases using scikit-learn regressors as multi-step forecasters. It also works - with any regressor compatible with the scikit-learn API. **Deep neural networks etc.** -- `nolearn `_ A number of wrappers and - abstractions around existing neural network libraries - -- `Keras `_ High-level API for - TensorFlow with a scikit-learn inspired API. - -- `lasagne `_ A lightweight library to - build and train neural networks in Theano. - - `skorch `_ A scikit-learn compatible neural network library that wraps PyTorch. @@ -209,6 +228,14 @@ and tasks. - `Flower `_ A friendly federated learning framework with a unified approach that can federate any workload, any ML framework, and any programming language. +**Privacy Preserving Machine Learning** + +- `Concrete ML `_ A privacy preserving + ML framework built on top of `Concrete + `_, with bindings to traditional ML + frameworks, thanks to fully homomorphic encryption. APIs of so-called + Concrete ML built-in models are very close to scikit-learn APIs. + **Broad scope** - `mlxtend `_ Includes a number of additional @@ -219,9 +246,6 @@ and tasks. **Other regression and classification** -- `xgboost `_ Optimised gradient boosted decision - tree library. - - `ML-Ensemble `_ Generalized ensemble learning (stacking, blending, subsemble, deep ensembles, etc.). @@ -232,10 +256,6 @@ and tasks. - `py-earth `_ Multivariate adaptive regression splines -- `Kernel Regression `_ - Implementation of Nadaraya-Watson kernel regression with automatic bandwidth - selection - - `gplearn `_ Genetic Programming for symbolic regression tasks. @@ -245,8 +265,6 @@ and tasks. - `seglearn `_ Time series and sequence learning using sliding window segmentation. -- `libOPF `_ Optimal path forest classifier - - `fastFM `_ Fast factorization machine implementation compatible with scikit-learn @@ -266,6 +284,7 @@ and tasks. - `hdbscan `_ HDBSCAN and Robust Single Linkage clustering algorithms for robust variable density clustering. + As of scikit-learn version 1.3.0, there is :class:`~sklearn.cluster.HDBSCAN`. - `spherecluster `_ Spherical K-means and mixture of von Mises Fisher clustering routines for data on the @@ -276,6 +295,8 @@ and tasks. - `categorical-encoding `_ A library of sklearn compatible categorical variable encoders. + As of scikit-learn version 1.3.0, there is + :class:`~sklearn.preprocessing.TargetEncoder`. - `imbalanced-learn `_ Various @@ -331,9 +352,6 @@ Recommendation Engine packages - `OpenRec `_ TensorFlow-based neural-network inspired recommendation algorithms. -- `Spotlight `_ Pytorch-based - implementation of deep recommender models. - - `Surprise Lib `_ Library for explicit feedback datasets. @@ -355,9 +373,6 @@ Domain specific packages - `AstroML `_ Machine learning for astronomy. -- `MSMBuilder `_ Machine learning for protein - conformational dynamics time series. - Translations of scikit-learn documentation ------------------------------------------ diff --git a/doc/roadmap.rst b/doc/roadmap.rst index be3607cf542fb..3d6cda2d6c969 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -1,5 +1,3 @@ -īģŋ.. _roadmap: - .. |ss| raw:: html @@ -8,6 +6,8 @@ +.. _roadmap: + Roadmap ======= diff --git a/doc/scss/api-search.scss b/doc/scss/api-search.scss new file mode 100644 index 0000000000000..51cf15f92c1cb --- /dev/null +++ b/doc/scss/api-search.scss @@ -0,0 +1,111 @@ +/** + * This is the styling for the API index page (`api/index`), in particular for the API + * search table. It involves overriding the style sheet of DataTables which does not + * fit well into the theme, especially in dark theme; see https://datatables.net/ + */ + +.dt-container { + margin-bottom: 2rem; + + // Fix the selection box for entries per page + select.dt-input { + padding: 0 !important; + margin-right: 0.4rem !important; + + > option { + color: var(--pst-color-text-base); + background-color: var(--pst-color-background); + } + } + + // Fix the search box + input.dt-input { + width: 50%; + line-height: normal; + padding: 0.1rem 0.3rem !important; + margin-left: 0.4rem !important; + } + + table.dataTable { + th { + // Avoid table header being too tall + p { + margin-bottom: 0; + } + + // Fix the ascending/descending order buttons in the header + span.dt-column-order { + &::before, + &::after { + color: var(--pst-color-text-base); + line-height: 0.7rem !important; + } + } + } + + td { + // Fix color of text warning no records found + &.dt-empty { + color: var(--pst-color-text-base) !important; + } + } + + // Unset bottom border of the last row + tr:last-child > * { + border-bottom: unset !important; + } + } + + div.dt-paging button.dt-paging-button { + padding: 0 0.5rem; + + &.disabled { + color: var(--pst-color-border) !important; + + // Overwrite the !important color assigned by DataTables because we must keep + // the color of disabled buttons consistent with and without hovering + &:hover { + color: var(--pst-color-border) !important; + } + } + + // Fix colors of paging buttons + &.current, + &:not(.disabled):not(.current):hover { + color: var(--pst-color-on-surface) !important; + border-color: var(--pst-color-surface) !important; + background: var(--pst-color-surface) !important; + } + + // Highlight the border of the current selected paging button + &.current { + border-color: var(--pst-color-text-base) !important; + } + } +} + +// Styling the object description cells in the table +div.sk-apisearch-desc { + p { + margin-bottom: 0; + } + + div.caption > p { + a, + code { + color: var(--pst-color-text-muted); + } + + code { + padding: 0; + font-size: 0.7rem; + font-weight: var(--pst-font-weight-caption); + background-color: transparent; + } + + .sd-badge { + font-size: 0.7rem; + margin-left: 0.3rem; + } + } +} diff --git a/doc/scss/api.scss b/doc/scss/api.scss new file mode 100644 index 0000000000000..d7110def4ac09 --- /dev/null +++ b/doc/scss/api.scss @@ -0,0 +1,52 @@ +/** + * This is the styling for API reference pages, currently under `modules/generated`. + * Note that it should be applied *ONLY* to API reference pages, as the selectors are + * designed based on how `autodoc` and `autosummary` generate the stuff. + */ + +// Make the admonitions more compact +div.versionadded, +div.versionchanged, +div.deprecated { + margin: 1rem auto; + + > p { + margin: 0.3rem auto; + } +} + +// Make docstrings more compact +dd { + p:not(table *) { + margin-bottom: 0.5rem !important; + } + + ul { + margin-bottom: 0.5rem !important; + padding-left: 2rem !important; + } +} + +// The first method is too close the the docstring above +dl.py.method:first-of-type { + margin-top: 2rem; +} + +// https://github.com/pydata/pydata-sphinx-theme/blob/8cf45f835bfdafc5f3821014a18f3b7e0fc2d44b/src/pydata_sphinx_theme/assets/styles/content/_api.scss +dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) { + margin-bottom: 1.5rem; + + dd { + margin-left: 1.2rem; + } + + // "Parameters", "Returns", etc. in the docstring + dt.field-odd, + dt.field-even { + margin: 0.5rem 0; + + + dd > dl { + margin-bottom: 0.5rem; + } + } +} diff --git a/doc/scss/colors.scss b/doc/scss/colors.scss new file mode 100644 index 0000000000000..bbc6aa6c2a3d6 --- /dev/null +++ b/doc/scss/colors.scss @@ -0,0 +1,51 @@ +/** + * This is the style sheet for customized colors of scikit-learn. + * Tints and shades are generated by https://colorkit.co/color-shades-generator/ + * + * This file is compiled into styles/colors.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +:root { + /* scikit-learn cyan */ + --sk-cyan-tint-9: #edf7fd; + --sk-cyan-tint-8: #daeffa; + --sk-cyan-tint-7: #c8e6f8; + --sk-cyan-tint-6: #b5def5; + --sk-cyan-tint-5: #a2d6f2; + --sk-cyan-tint-4: #8fcdef; + --sk-cyan-tint-3: #7ac5ec; + --sk-cyan-tint-2: #64bce9; + --sk-cyan-tint-1: #4bb4e5; + --sk-cyan: #29abe2; + --sk-cyan-shades-1: #2294c4; + --sk-cyan-shades-2: #1c7ea8; + --sk-cyan-shades-3: #15688c; + --sk-cyan-shades-4: #0f5471; + --sk-cyan-shades-5: #094057; + --sk-cyan-shades-6: #052d3e; + --sk-cyan-shades-7: #021b27; + --sk-cyan-shades-8: #010b12; + --sk-cyan-shades-9: #000103; + + /* scikit-learn orange */ + --sk-orange-tint-9: #fff5ec; + --sk-orange-tint-8: #ffead9; + --sk-orange-tint-7: #ffe0c5; + --sk-orange-tint-6: #ffd5b2; + --sk-orange-tint-5: #fecb9e; + --sk-orange-tint-4: #fdc08a; + --sk-orange-tint-3: #fcb575; + --sk-orange-tint-2: #fbaa5e; + --sk-orange-tint-1: #f99f44; + --sk-orange: #f7931e; + --sk-orange-shades-1: #d77f19; + --sk-orange-shades-2: #b76c13; + --sk-orange-shades-3: #99590e; + --sk-orange-shades-4: #7c4709; + --sk-orange-shades-5: #603605; + --sk-orange-shades-6: #452503; + --sk-orange-shades-7: #2c1601; + --sk-orange-shades-8: #150800; + --sk-orange-shades-9: #030100; +} diff --git a/doc/scss/custom.scss b/doc/scss/custom.scss new file mode 100644 index 0000000000000..ce4451fce4467 --- /dev/null +++ b/doc/scss/custom.scss @@ -0,0 +1,192 @@ +/** + * This is a general styling sheet. + * It should be used for customizations that affect multiple pages. + * + * This file is compiled into styles/custom.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +/* Global */ + +code.literal { + border: 0; +} + +/* Version switcher */ + +.version-switcher__menu a.list-group-item.sk-avail-docs-link { + display: flex; + align-items: center; + + &:after { + content: var(--pst-icon-external-link); + font: var(--fa-font-solid); + font-size: 0.75rem; + margin-left: 0.5rem; + } +} + +/* Primary sidebar */ + +.bd-sidebar-primary { + width: 22.5%; + min-width: 16rem; + + // The version switcher button in the sidebar is ill-styled + button.version-switcher__button { + margin-bottom: unset; + margin-left: 0.3rem; + font-size: 1rem; + } + + // The section navigation part is to close to the right boundary (originally an even + // larger negative right margin was used) + nav.bd-links { + margin-right: -0.5rem; + } +} + +/* Article content */ + +.bd-article { + h1 { + font-weight: 500; + margin-bottom: 2rem; + } + + h2 { + font-weight: 500; + margin-bottom: 1.5rem; + } + + // Avoid changing the aspect ratio of images; add some padding so that at least + // there is some space between image and background in dark mode + img { + height: unset !important; + padding: 1%; + } + + // Resize table of contents to make the top few levels of headings more visible + li.toctree-l1 { + padding-bottom: 0.5em; + + > a { + font-size: 150%; + font-weight: bold; + } + } + + li.toctree-l2, + li.toctree-l3, + li.toctree-l4 { + margin-left: 15px; + } +} + +/* Dropdowns (sphinx-design) */ + +details.sd-dropdown { + &:hover > summary.sd-summary-title > a.headerlink { + visibility: visible; + } + + > summary.sd-summary-title { + > a.headerlink { + font-size: 1rem; + } + + // See `js/scripts/dropdown.js`: this is styling the "expand/collapse all" button + > button.sk-toggle-all { + color: var(--pst-sd-dropdown-color); + top: 0.9rem !important; + right: 3rem !important; + pointer-events: auto !important; + display: none; + border: none; + background: transparent; + } + } + + &[open] > summary.sd-summary-title:hover > .sd-summary-up.sk-toggle-all, + &:not([open]) + > summary.sd-summary-title:hover + > .sd-summary-down.sk-toggle-all { + display: block; + } +} + +/* scikit-learn buttons */ + +a.btn { + &.sk-btn-orange { + background-color: var(--sk-orange-tint-1); + color: black !important; + + &:hover { + background-color: var(--sk-orange-tint-3); + } + } + + &.sk-btn-cyan { + background-color: var(--sk-cyan-shades-2); + color: white !important; + + &:hover { + background-color: var(--sk-cyan-shades-1); + } + } +} + +/* scikit-learn avatar grid, see build_tools/generate_authors_table.py */ + +div.sk-authors-container { + display: flex; + flex-wrap: wrap; + justify-content: center; + + > div { + width: 6rem; + margin: 0.5rem; + font-size: 0.9rem; + } +} + +/* scikit-learn text-image grid, used in testimonials and sponsors pages */ + +@mixin sk-text-image-grid($img-max-height) { + display: flex; + align-items: center; + flex-wrap: wrap; + + div.text-box, + div.image-box { + width: 50%; + + @media screen and (max-width: 500px) { + width: 100%; + } + } + + div.text-box .annotation { + font-size: 0.9rem; + font-style: italic; + color: var(--pst-color-text-muted); + } + + div.image-box { + text-align: center; + + img { + max-height: $img-max-height; + max-width: 50%; + } + } +} + +div.sk-text-image-grid-small { + @include sk-text-image-grid(60px); +} + +div.sk-text-image-grid-large { + @include sk-text-image-grid(100px); +} diff --git a/doc/scss/index.scss b/doc/scss/index.scss new file mode 100644 index 0000000000000..c3bb8e86b41c6 --- /dev/null +++ b/doc/scss/index.scss @@ -0,0 +1,176 @@ +/** + * Styling sheet for the scikit-learn landing page. This should be loaded only for the + * landing page. + * + * This file is compiled into styles/index.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +/* Theme-aware colors for the landing page */ + +html { + &[data-theme="light"] { + --sk-landing-bg-1: var(--sk-cyan-shades-3); + --sk-landing-bg-2: var(--sk-cyan); + --sk-landing-bg-3: var(--sk-orange-tint-8); + --sk-landing-bg-4: var(--sk-orange-tint-3); + } + + &[data-theme="dark"] { + --sk-landing-bg-1: var(--sk-cyan-shades-5); + --sk-landing-bg-2: var(--sk-cyan-shades-2); + --sk-landing-bg-3: var(--sk-orange-tint-4); + --sk-landing-bg-4: var(--sk-orange-tint-1); + } +} + +/* General */ + +div.sk-landing-container { + max-width: 1400px; +} + +/* Top bar */ + +div.sk-landing-top-bar { + background-image: linear-gradient( + 160deg, + var(--sk-landing-bg-1) 0%, + var(--sk-landing-bg-2) 17%, + var(--sk-landing-bg-3) 59%, + var(--sk-landing-bg-4) 100% + ); + + .sk-landing-header, + .sk-landing-subheader { + color: white; + text-shadow: 0px 0px 8px var(--sk-landing-bg-1); + } + + .sk-landing-header { + font-size: 3.2rem; + margin-bottom: 0.5rem; + } + + .sk-landing-subheader { + letter-spacing: 0.17rem; + margin-top: 0; + font-weight: 500; + } + + a.sk-btn-orange { + font-size: 1.1rem; + font-weight: 500; + } + + ul.sk-landing-header-body { + margin-top: auto; + margin-bottom: auto; + font-size: 1.2rem; + font-weight: 500; + color: black; + } +} + +/* Body */ + +div.sk-landing-body { + div.card { + background-color: var(--pst-color-background); + border-color: var(--pst-color-border); + } + + .sk-px-xl-4 { + @media screen and (min-width: 1200px) { + padding-left: 1.3rem !important; + padding-right: 1.3rem !important; + } + } + + .card-body { + p { + margin-bottom: 0.8rem; + color: var(--pst-color-text-base); + } + + .sk-card-title { + font-weight: 700; + margin: 0 0 1rem 0; + } + } + + .sk-card-img-container { + display: flex; + justify-content: center; + align-items: end; + margin-bottom: 1rem; + + img { + max-width: unset; + height: 15rem; + } + } +} + +/* More info */ + +div.sk-landing-more-info { + font-size: 0.96rem; + background-color: var(--pst-color-surface); + + .sk-landing-call-header { + font-weight: 700; + margin-top: 0; + + html[data-theme="light"] & { + color: var(--sk-orange-shades-1); + } + + html[data-theme="dark"] & { + color: var(--sk-orange); + } + } + + ul.sk-landing-call-list > li { + margin-bottom: 0.25rem; + } + + .sk-who-uses-carousel { + min-height: 200px; + + .carousel-item img { + max-height: 100px; + max-width: 50%; + margin: 0.5rem; + } + } + + .sk-more-testimonials { + text-align: right !important; + } +} + +/* Footer */ + +div.sk-landing-footer { + a.sk-footer-funding-link { + text-decoration: none; + + p.sk-footer-funding-text { + color: var(--pst-color-link); + + &:hover { + color: var(--pst-color-secondary); + } + } + + div.sk-footer-funding-logos > img { + max-height: 40px; + max-width: 85px; + margin: 0 8px 8px 8px; + padding: 5px; + border-radius: 3px; + background-color: white; + } + } +} diff --git a/doc/scss/install.scss b/doc/scss/install.scss new file mode 100644 index 0000000000000..92e201f00a107 --- /dev/null +++ b/doc/scss/install.scss @@ -0,0 +1,20 @@ +/** + * Styling for the installation page, including overriding some default styling of + * sphinx-design. This style sheet should be included only for the install page. + * + * This file is compiled into styles/install.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +.install-instructions .sd-tab-set > label.sd-tab-label { + margin: 0; + text-align: center; + + &.tab-6 { + width: 50% !important; + } + + &.tab-4 { + width: calc(100% / 3) !important; + } +} diff --git a/doc/sphinxext/add_toctree_functions.py b/doc/sphinxext/add_toctree_functions.py deleted file mode 100644 index 4459ab971f4c4..0000000000000 --- a/doc/sphinxext/add_toctree_functions.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Inspired by https://github.com/pandas-dev/pydata-sphinx-theme - -BSD 3-Clause License - -Copyright (c) 2018, pandas -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -import docutils - - -def add_toctree_functions(app, pagename, templatename, context, doctree): - """Add functions so Jinja templates can add toctree objects. - - This converts the docutils nodes into a nested dictionary that Jinja can - use in our templating. - """ - from sphinx.environment.adapters.toctree import TocTree - - def get_nav_object(maxdepth=None, collapse=True, numbered=False, **kwargs): - """Return a list of nav links that can be accessed from Jinja. - - Parameters - ---------- - maxdepth: int - How many layers of TocTree will be returned - collapse: bool - Whether to only include sub-pages of the currently-active page, - instead of sub-pages of all top-level pages of the site. - numbered: bool - Whether to add section number to title - kwargs: key/val pairs - Passed to the `TocTree.get_toctree_for` Sphinx method - """ - # The TocTree will contain the full site TocTree including sub-pages. - # "collapse=True" collapses sub-pages of non-active TOC pages. - # maxdepth controls how many TOC levels are returned - toctree = TocTree(app.env).get_toctree_for( - pagename, app.builder, collapse=collapse, maxdepth=maxdepth, **kwargs - ) - # If no toctree is defined (AKA a single-page site), skip this - if toctree is None: - return [] - - # toctree has this structure - # - # - # - # - # `list_item`s are the actual TOC links and are the only thing we want - toc_items = [ - item - for child in toctree.children - for item in child - if isinstance(item, docutils.nodes.list_item) - ] - - # Now convert our docutils nodes into dicts that Jinja can use - nav = [ - docutils_node_to_jinja(child, only_pages=True, numbered=numbered) - for child in toc_items - ] - - return nav - - context["get_nav_object"] = get_nav_object - - -def docutils_node_to_jinja(list_item, only_pages=False, numbered=False): - """Convert a docutils node to a structure that can be read by Jinja. - - Parameters - ---------- - list_item : docutils list_item node - A parent item, potentially with children, corresponding to the level - of a TocTree. - only_pages : bool - Only include items for full pages in the output dictionary. Exclude - anchor links (TOC items with a URL that starts with #) - numbered: bool - Whether to add section number to title - - Returns - ------- - nav : dict - The TocTree, converted into a dictionary with key/values that work - within Jinja. - """ - if not list_item.children: - return None - - # We assume this structure of a list item: - # - # - # <-- the thing we want - reference = list_item.children[0].children[0] - title = reference.astext() - url = reference.attributes["refuri"] - active = "current" in list_item.attributes["classes"] - - secnumber = reference.attributes.get("secnumber", None) - if numbered and secnumber is not None: - secnumber = ".".join(str(n) for n in secnumber) - title = f"{secnumber}. {title}" - - # If we've got an anchor link, skip it if we wish - if only_pages and "#" in url: - return None - - # Converting the docutils attributes into jinja-friendly objects - nav = {} - nav["title"] = title - nav["url"] = url - nav["active"] = active - - # Recursively convert children as well - # If there are sub-pages for this list_item, there should be two children: - # a paragraph, and a bullet_list. - nav["children"] = [] - if len(list_item.children) > 1: - # The `.children` of the bullet_list has the nodes of the sub-pages. - subpage_list = list_item.children[1].children - for sub_page in subpage_list: - child_nav = docutils_node_to_jinja( - sub_page, only_pages=only_pages, numbered=numbered - ) - if child_nav is not None: - nav["children"].append(child_nav) - return nav - - -def setup(app): - app.connect("html-page-context", add_toctree_functions) - - return {"parallel_read_safe": True, "parallel_write_safe": True} diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index e8f94506daaa5..89d7077bce2b5 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -1,11 +1,12 @@ -from sklearn.utils import all_estimators -from sklearn.utils.estimator_checks import _construct_instance -from sklearn.utils._testing import SkipTest -from docutils import nodes from contextlib import suppress +from docutils import nodes from docutils.parsers.rst import Directive +from sklearn.utils import all_estimators +from sklearn.utils._testing import SkipTest +from sklearn.utils.estimator_checks import _construct_instance + class AllowNanEstimators(Directive): @staticmethod diff --git a/doc/sphinxext/autoshortsummary.py b/doc/sphinxext/autoshortsummary.py new file mode 100644 index 0000000000000..8451f3133d05b --- /dev/null +++ b/doc/sphinxext/autoshortsummary.py @@ -0,0 +1,53 @@ +from sphinx.ext.autodoc import ModuleLevelDocumenter + + +class ShortSummaryDocumenter(ModuleLevelDocumenter): + """An autodocumenter that only renders the short summary of the object.""" + + # Defines the usage: .. autoshortsummary:: {{ object }} + objtype = "shortsummary" + + # Disable content indentation + content_indent = "" + + # Avoid being selected as the default documenter for some objects, because we are + # returning `can_document_member` as True for all objects + priority = -99 + + @classmethod + def can_document_member(cls, member, membername, isattr, parent): + """Allow documenting any object.""" + return True + + def get_object_members(self, want_all): + """Document no members.""" + return (False, []) + + def add_directive_header(self, sig): + """Override default behavior to add no directive header or options.""" + pass + + def add_content(self, more_content): + """Override default behavior to add only the first line of the docstring. + + Modified based on the part of processing docstrings in the original + implementation of this method. + + https://github.com/sphinx-doc/sphinx/blob/faa33a53a389f6f8bc1f6ae97d6015fa92393c4a/sphinx/ext/autodoc/__init__.py#L609-L622 + """ + sourcename = self.get_sourcename() + docstrings = self.get_doc() + + if docstrings is not None: + if not docstrings: + docstrings.append([]) + # Get the first non-empty line of the processed docstring; this could lead + # to unexpected results if the object does not have a short summary line. + short_summary = next( + (s for s in self.process_doc(docstrings) if s), "" + ) + self.add_line(short_summary, sourcename, 0) + + +def setup(app): + app.add_autodocumenter(ShortSummaryDocumenter) diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py index 7d188969bb778..9f117b07fa6a3 100644 --- a/doc/sphinxext/doi_role.py +++ b/doc/sphinxext/doi_role.py @@ -1,21 +1,20 @@ """ - doilinks - ~~~~~~~~ - Extension to add links to DOIs. With this extension you can use e.g. - :doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will - create a link to a DOI resolver - (``https://doi.org/10.1016/S0022-2836(05)80360-2``). - The link caption will be the raw DOI. - You can also give an explicit caption, e.g. - :doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`. - - :copyright: Copyright 2015 Jon Lund Steffensen. Based on extlinks by - the Sphinx team. - :license: BSD. +doilinks +~~~~~~~~ +Extension to add links to DOIs. With this extension you can use e.g. +:doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will +create a link to a DOI resolver +(``https://doi.org/10.1016/S0022-2836(05)80360-2``). +The link caption will be the raw DOI. +You can also give an explicit caption, e.g. +:doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`. + +:copyright: Copyright 2015 Jon Lund Steffensen. Based on extlinks by + the Sphinx team. +:license: BSD. """ from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title diff --git a/doc/sphinxext/dropdown_anchors.py b/doc/sphinxext/dropdown_anchors.py new file mode 100644 index 0000000000000..eb0b414de6ae8 --- /dev/null +++ b/doc/sphinxext/dropdown_anchors.py @@ -0,0 +1,78 @@ +import re + +from docutils import nodes +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx_design.dropdown import dropdown_main, dropdown_title + + +class DropdownAnchorAdder(SphinxPostTransform): + """Insert anchor links to the sphinx-design dropdowns. + + Some of the dropdowns were originally headers that had automatic anchors, so we + need to make sure that the old anchors still work. See the original implementation + (in JS): https://github.com/scikit-learn/scikit-learn/pull/27409 + + The structure of each sphinx-design dropdown node is expected to be: + + + + ...icon <-- This exists if the "icon" option of the sphinx-design + dropdown is set; we do not use it in our documentation + + ...title <-- This may contain multiple nodes, e.g. literal nodes if + there are inline codes; we use the concatenated text of + all these nodes to generate the anchor ID + + Here we insert the anchor link! + + <-- The "dropdown closed" marker + <-- The "dropdown open" marker + + + ...main contents + + + """ + + default_priority = 9999 # Apply later than everything else + formats = ["html"] + + def run(self): + """Run the post transformation.""" + # Counter to store the duplicated summary text to add it as a suffix in the + # anchor ID + anchor_id_counters = {} + + for sd_dropdown in self.document.findall(dropdown_main): + # Grab the dropdown title + sd_dropdown_title = sd_dropdown.next_node(dropdown_title) + + # Concatenate the text of relevant nodes as the title text + # Since we do not have the prefix icon, the relevant nodes are the very + # first child node until the third last node (last two are markers) + title_text = "".join( + node.astext() for node in sd_dropdown_title.children[:-2] + ) + + # The ID uses the first line, lowercased, with spaces replaced by dashes; + # suffix the anchor ID with a counter if it already exists + anchor_id = re.sub(r"\s+", "-", title_text.strip().split("\n")[0]).lower() + if anchor_id in anchor_id_counters: + anchor_id_counters[anchor_id] += 1 + anchor_id = f"{anchor_id}-{anchor_id_counters[anchor_id]}" + else: + anchor_id_counters[anchor_id] = 1 + sd_dropdown["ids"].append(anchor_id) + + # Create the anchor element and insert after the title text; we do this + # directly with raw HTML + anchor_html = ( + f'#' + ) + anchor_node = nodes.raw("", anchor_html, format="html") + sd_dropdown_title.insert(-2, anchor_node) # before the two markers + + +def setup(app): + app.add_post_transform(DropdownAnchorAdder) diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index 3992d814b825e..2cd1fbd83af47 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -1,9 +1,9 @@ -from operator import attrgetter import inspect -import subprocess import os +import subprocess import sys from functools import partial +from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" @@ -26,10 +26,10 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): >>> _linkcode_resolve('py', {'module': 'tty', ... 'fullname': 'setraw'}, ... package='tty', - ... url_fmt='http://hg.python.org/cpython/file/' + ... url_fmt='https://hg.python.org/cpython/file/' ... '{revision}/Lib/{package}/{path}#L{lineno}', ... revision='xxxx') - 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + 'https://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' """ if revision is None: diff --git a/doc/sphinxext/move_gallery_links.py b/doc/sphinxext/move_gallery_links.py new file mode 100644 index 0000000000000..dff27f7358c7f --- /dev/null +++ b/doc/sphinxext/move_gallery_links.py @@ -0,0 +1,193 @@ +""" +This script intends to better integrate sphinx-gallery into pydata-sphinx-theme. In +particular, it moves the download links and badge links in the footer of each generated +example page into the secondary sidebar, then removes the footer and the top note +pointing to the footer. + +The download links are for Python source code and Jupyter notebook respectively, and +the badge links are for JupyterLite and Binder. + +Currently this is achieved via post-processing the HTML generated by sphinx-gallery. +This hack can be removed if the following upstream issue is resolved: +https://github.com/sphinx-gallery/sphinx-gallery/issues/1258 +""" + +from pathlib import Path + +from bs4 import BeautifulSoup +from sphinx.util.display import status_iterator +from sphinx.util.logging import getLogger + +logger = getLogger(__name__) + + +def move_gallery_links(app, exception): + if exception is not None: + return + + for gallery_dir in app.config.sphinx_gallery_conf["gallery_dirs"]: + html_gallery_dir = Path(app.builder.outdir, gallery_dir) + + # Get all gallery example files to be tweaked; tuples (file, docname) + flat = [] + for file in html_gallery_dir.rglob("*.html"): + if file.name in ("index.html", "sg_execution_times.html"): + # These are not gallery example pages, skip + continue + + # Extract the documentation name from the path + docname = file.relative_to(app.builder.outdir).with_suffix("").as_posix() + if docname in app.config.html_context["redirects"]: + # This is a redirected page, skip + continue + if docname not in app.project.docnames: + # This should not happen, warn + logger.warning(f"Document {docname} not found but {file} exists") + continue + flat.append((file, docname)) + + for html_file, _ in status_iterator( + flat, + length=len(flat), + summary="Tweaking gallery links... ", + verbosity=app.verbosity, + stringify_func=lambda x: x[1], # display docname + ): + with html_file.open("r", encoding="utf-8") as f: + html = f.read() + soup = BeautifulSoup(html, "html.parser") + + # Find the secondary sidebar; it should exist in all gallery example pages + secondary_sidebar = soup.find("div", class_="sidebar-secondary-items") + if secondary_sidebar is None: + logger.warning(f"Secondary sidebar not found in {html_file}") + continue + + def _create_secondary_sidebar_component(items): + """Create a new component in the secondary sidebar. + + `items` should be a list of dictionaries with "element" being the bs4 + tag of the component and "title" being the title (None if not needed). + """ + component = soup.new_tag("div", **{"class": "sidebar-secondary-item"}) + for item in items: + item_wrapper = soup.new_tag("div") + item_wrapper.append(item["element"]) + if item["title"]: + item_wrapper["title"] = item["title"] + component.append(item_wrapper) + secondary_sidebar.append(component) + + def _create_download_link(link, is_jupyter=False): + """Create a download link to be appended to a component. + + `link` should be the bs4 tag of the original download link, either for + the Python source code (is_jupyter=False) of for the Jupyter notebook + (is_jupyter=True). `link` will not be removed; instead the whole + footnote would be removed where `link` is located. + + This returns a dictionary with "element" being the bs4 tag of the new + download link and "title" being the name of the file to download. + """ + new_link = soup.new_tag("a", href=link["href"], download="") + + # Place a download icon at the beginning of the new link + download_icon = soup.new_tag("i", **{"class": "fa-solid fa-download"}) + new_link.append(download_icon) + + # Create the text of the new link; it is shortend to fit better into + # the secondary sidebar. The leading space before "Download ..." is + # intentional to create a small gap between the icon and the text, + # being consistent with the other pydata-sphinx-theme components + link_type = "Jupyter notebook" if is_jupyter else "source code" + new_text = soup.new_string(f" Download {link_type}") + new_link.append(new_text) + + # Get the file name to download and use it as the title of the new link + # which will show up when hovering over the link; the file name is + # expected to be in the last span of `link` + link_spans = link.find_all("span") + title = link_spans[-1].text if link_spans else None + + return {"element": new_link, "title": title} + + def _create_badge_link(link): + """Create a badge link to be appended to a component. + + `link` should be the bs4 tag of the original badge link, either for + binder or JupyterLite. `link` will not be removed; instead the whole + footnote would be removed where `link` is located. + + This returns a dictionary with "element" being the bs4 tag of the new + download link and "title" being `None` (no need). + """ + new_link = soup.new_tag("a", href=link["href"]) + + # The link would essentially be an anchor wrapper outside the image of + # the badge; we get the src and alt attributes by finding the original + # image and limit the height to 20px (fixed) so that the secondary + # sidebar will appear neater + badge_img = link.find("img") + new_img = soup.new_tag( + "img", src=badge_img["src"], alt=badge_img["alt"], height=20 + ) + new_link.append(new_img) + + return {"element": new_link, "title": None} + + try: + # `sg_note` is the "go to the end" note at the top of the page + # `sg_footer` is the footer with the download links and badge links + # These will be removed at the end if new links are successfully created + sg_note = soup.find("div", class_="sphx-glr-download-link-note") + sg_footer = soup.find("div", class_="sphx-glr-footer") + + # If any one of these two is not found, we directly give up tweaking + if sg_note is None or sg_footer is None: + continue + + # Move the download links into the secondary sidebar + py_link_div = sg_footer.find("div", class_="sphx-glr-download-python") + ipy_link_div = sg_footer.find("div", class_="sphx-glr-download-jupyter") + _create_secondary_sidebar_component( + [ + _create_download_link(py_link_div.a, is_jupyter=False), + _create_download_link(ipy_link_div.a, is_jupyter=True), + ] + ) + + # Move the badge links into the secondary sidebar + lite_link_div = sg_footer.find("div", class_="lite-badge") + binder_link_div = sg_footer.find("div", class_="binder-badge") + _create_secondary_sidebar_component( + [ + _create_badge_link(lite_link_div.a), + _create_badge_link(binder_link_div.a), + ] + ) + + # Remove the sourcelink component from the secondary sidebar; the reason + # we do not remove it by configuration is that we need the secondary + # sidebar to be present for this script to work, while in-page toc alone + # could have been empty + sourcelink = secondary_sidebar.find("div", class_="sourcelink") + if sourcelink is not None: + sourcelink.parent.extract() # because sourcelink has a wrapper div + + # Remove the the top note and the whole footer + sg_note.extract() + sg_footer.extract() + + except Exception: + # If any step fails we directly skip the file + continue + + # Write the modified file back + with html_file.open("w", encoding="utf-8") as f: + f.write(str(soup)) + + +def setup(app): + # Default priority is 500 which sphinx-gallery uses for its build-finished events; + # we need a larger priority to run after sphinx-gallery (larger is later) + app.connect("build-finished", move_gallery_links, priority=900) diff --git a/doc/sphinxext/override_pst_pagetoc.py b/doc/sphinxext/override_pst_pagetoc.py new file mode 100644 index 0000000000000..f5697de8ef155 --- /dev/null +++ b/doc/sphinxext/override_pst_pagetoc.py @@ -0,0 +1,84 @@ +from functools import cache + +from sphinx.util.logging import getLogger + +logger = getLogger(__name__) + + +def override_pst_pagetoc(app, pagename, templatename, context, doctree): + """Overrides the `generate_toc_html` function of pydata-sphinx-theme for API.""" + + @cache + def generate_api_toc_html(kind="html"): + """Generate the in-page toc for an API page. + + This relies on the `generate_toc_html` function added by pydata-sphinx-theme + into the context. We save the original function into `pst_generate_toc_html` + and override `generate_toc_html` with this function for generated API pages. + + The pagetoc of an API page would look like the following: + +
    <-- Unwrap +
  • <-- Unwrap + {{obj}} <-- Decompose + +
      +
    • + ...object +
        <-- Set visible if exists +
      • ...method 1
      • <-- Shorten +
      • ...method 2
      • <-- Shorten + ...more methods <-- Shorten +
      +
    • +
    • ...gallery examples
    • +
    + +
  • <-- Unwrapped +
<-- Unwrapped + """ + soup = context["pst_generate_toc_html"](kind="soup") + + try: + # Unwrap the outermost level + soup.ul.unwrap() + soup.li.unwrap() + soup.a.decompose() + + # Get all toc-h2 level entries, where the first one should be the function + # or class, and the second one, if exists, should be the examples; there + # should be no more than two entries at this level for generated API pages + lis = soup.ul.select("li.toc-h2") + main_li = lis[0] + meth_list = main_li.ul + + if meth_list is not None: + # This is a class API page, we remove the class name from the method + # names to make them better fit into the secondary sidebar; also we + # make the toc-h3 level entries always visible to more easily navigate + # through the methods + meth_list["class"].append("visible") + for meth in meth_list.find_all("li", {"class": "toc-h3"}): + target = meth.a.code.span + target.string = target.string.split(".", 1)[1] + + # This corresponds to the behavior of `generate_toc_html` + return str(soup) if kind == "html" else soup + + except Exception as e: + # Upon any failure we return the original pagetoc + logger.warning( + f"Failed to generate API pagetoc for {pagename}: {e}; falling back" + ) + return context["pst_generate_toc_html"](kind=kind) + + # Override the pydata-sphinx-theme implementation for generate API pages + if pagename.startswith("modules/generated/"): + context["pst_generate_toc_html"] = context["generate_toc_html"] + context["generate_toc_html"] = generate_api_toc_html + + +def setup(app): + # Need to be triggered after `pydata_sphinx_theme.toctree.add_toctree_functions`, + # and since default priority is 500 we set 900 for safety + app.connect("html-page-context", override_pst_pagetoc, priority=900) diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py index 5cd532319cbd7..206359a1bd703 100644 --- a/doc/sphinxext/sphinx_issues.py +++ b/doc/sphinxext/sphinx_issues.py @@ -18,6 +18,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + import re from docutils import nodes, utils diff --git a/doc/supervised_learning.rst b/doc/supervised_learning.rst index 71fb3007c2e3c..ba24e8ee23c6f 100644 --- a/doc/supervised_learning.rst +++ b/doc/supervised_learning.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _supervised-learning: Supervised learning diff --git a/doc/support.rst b/doc/support.rst index 520bd015ff6da..be9b32b60a9c8 100644 --- a/doc/support.rst +++ b/doc/support.rst @@ -2,96 +2,120 @@ Support ======= -There are several ways to get in touch with the developers. +There are several channels to connect with scikit-learn developers for assistance, feedback, or contributions. +**Note**: Communications on all channels should respect our `Code of Conduct `_. -.. _mailing_lists: -Mailing List -============ +.. _announcements_and_notification: -- The main mailing list is `scikit-learn - `_. +Mailing Lists +============= -- There is also a commit list `scikit-learn-commits - `_, - where updates to the main repository and test failures get notified. +- **Main Mailing List**: Join the primary discussion + platform for scikit-learn at `scikit-learn Mailing List + `_. +- **Commit Updates**: Stay informed about repository + updates and test failures on the `scikit-learn-commits list + `_. .. _user_questions: -User questions +User Questions ============== -- Some scikit-learn developers support users on StackOverflow using - the `[scikit-learn] `_ +If you have questions, this is our general workflow. + +- **Stack Overflow**: Some scikit-learn developers support users using the + `[scikit-learn] `_ tag. -- For general theoretical or methodological Machine Learning questions - `stack exchange `_ is probably a more - suitable venue. +- **General Machine Learning Queries**: For broader machine learning + discussions, visit `Stack Exchange `_. + +When posting questions: + +- Please use a descriptive question in the title field (e.g. no "Please + help with scikit-learn!" as this is not a question) + +- Provide detailed context, expected results, and actual observations. + +- Include code and data snippets (preferably minimalistic scripts, + up to ~20 lines). -In both cases please use a descriptive question in the title field (e.g. -no "Please help with scikit-learn!" as this is not a question) and put -details on what you tried to achieve, what were the expected results and -what you observed instead in the details field. +- Describe your data and preprocessing steps, including sample size, + feature types (categorical or numerical), and the target for supervised + learning tasks (classification type or regression). -Code and data snippets are welcome. Minimalistic (up to ~20 lines long) -reproduction script very helpful. +**Note**: Avoid asking user questions on the bug tracker to keep +the focus on development. -Please describe the nature of your data and how you preprocessed it: -what is the number of samples, what is the number and type of features -(i.d. categorical or numerical) and for supervised learning tasks, -what target are your trying to predict: binary, multiclass (1 out of -``n_classes``) or multilabel (``k`` out of ``n_classes``) classification -or continuous variable regression. +- `GitHub Discussions `_ + Usage questions such as methodological -User questions should **not be asked on the bug tracker**, as it crowds -the list of issues and makes the development of the project harder. +- `Stack Overflow `_ + Programming/user questions with `[scikit-learn]` tag + +- `GitHub Bug Tracker `_ + Bug reports - Please do not ask usage questions on the issue tracker. + +- `Discord Server `_ + Current pull requests - Post any specific PR-related questions on your PR, + and you can share a link to your PR on this server. .. _bug_tracker: -Bug tracker +Bug Tracker =========== -If you think you've encountered a bug, please report it to the issue tracker: +Encountered a bug? Report it on our `issue tracker +`_ + +Include in your report: -https://github.com/scikit-learn/scikit-learn/issues +- Steps or scripts to reproduce the bug. -Don't forget to include: +- Expected and observed outcomes. - - steps (or better script) to reproduce, +- Python or gdb tracebacks, if applicable. - - expected outcome, +- The ideal bug report contains a :ref:`short reproducible code snippet + `, this way anyone can try to reproduce the bug easily. - - observed outcome or Python (or gdb) tracebacks +- If your snippet is longer than around 50 lines, please link to a + `gist `_ or a github repo. -To help developers fix your bug faster, please link to a https://gist.github.com -holding a standalone minimalistic python script that reproduces your bug and -optionally a minimalistic subsample of your dataset (for instance, exported -as CSV files using ``numpy.savetxt``). +**Tip**: Gists are Git repositories; you can push data files to them using Git. -Note: Gists are Git cloneable repositories and thus you can use Git to -push datafiles to them. +.. _social_media: +Social Media +============ + +scikit-learn has presence on various social media platforms to share +updates with the community. The platforms are not monitored for user +questions. .. _gitter: Gitter ====== -Some developers like to hang out on scikit-learn Gitter room: -https://gitter.im/scikit-learn/scikit-learn. - +**Note**: The scikit-learn Gitter room is no longer an active community. +For live discussions and support, please refer to the other channels +mentioned in this document. .. _documentation_resources: -Documentation resources +Documentation Resources ======================= -This documentation is relative to |release|. Documentation for -other versions can be found `here -`__. +This documentation is for |release|. Find documentation for other versions +`here `__. -Printable pdf documentation for old versions can be found `here +Older versions' printable PDF documentation is available `here `_. +Building the PDF documentation is no longer supported in the website, +but you can still generate it locally by following the +:ref:`building documentation instructions `. diff --git a/doc/templates/base.rst b/doc/templates/base.rst new file mode 100644 index 0000000000000..ee86bd8a18dbe --- /dev/null +++ b/doc/templates/base.rst @@ -0,0 +1,36 @@ +{{ objname | escape | underline(line="=") }} + +{% if objtype == "module" -%} + +.. automodule:: {{ fullname }} + +{%- elif objtype == "function" -%} + +.. currentmodule:: {{ module }} + +.. autofunction:: {{ objname }} + +.. minigallery:: {{ module }}.{{ objname }} + :add-heading: Gallery examples + :heading-level: - + +{%- elif objtype == "class" -%} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :inherited-members: + :special-members: __call__ + +.. minigallery:: {{ module }}.{{ objname }} {% for meth in methods %}{{ module }}.{{ objname }}.{{ meth }} {% endfor %} + :add-heading: Gallery examples + :heading-level: - + +{%- else -%} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} + +{%- endif -%} diff --git a/doc/templates/class.rst b/doc/templates/class.rst deleted file mode 100644 index 79ff2cf807794..0000000000000 --- a/doc/templates/class.rst +++ /dev/null @@ -1,12 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/class_with_call.rst b/doc/templates/class_with_call.rst deleted file mode 100644 index f98b7dbbf6578..0000000000000 --- a/doc/templates/class_with_call.rst +++ /dev/null @@ -1,16 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}=============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __call__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class.rst b/doc/templates/deprecated_class.rst deleted file mode 100644 index 857e2c28ce1da..0000000000000 --- a/doc/templates/deprecated_class.rst +++ /dev/null @@ -1,23 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __init__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class_with_call.rst b/doc/templates/deprecated_class_with_call.rst deleted file mode 100644 index a04efcb80be07..0000000000000 --- a/doc/templates/deprecated_class_with_call.rst +++ /dev/null @@ -1,24 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}=============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __init__ - .. automethod:: __call__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class_without_init.rst b/doc/templates/deprecated_class_without_init.rst deleted file mode 100644 index c019992493610..0000000000000 --- a/doc/templates/deprecated_class_without_init.rst +++ /dev/null @@ -1,19 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_function.rst b/doc/templates/deprecated_function.rst deleted file mode 100644 index 6d13ac6aca2de..0000000000000 --- a/doc/templates/deprecated_function.rst +++ /dev/null @@ -1,19 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}==================== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/display_all_class_methods.rst b/doc/templates/display_all_class_methods.rst deleted file mode 100644 index 1211296bb57ce..0000000000000 --- a/doc/templates/display_all_class_methods.rst +++ /dev/null @@ -1,14 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples -.. include:: {{module}}.{{objname}}.from_estimator.examples -.. include:: {{module}}.{{objname}}.from_predictions.examples - -.. raw:: html - -
diff --git a/doc/templates/display_only_from_estimator.rst b/doc/templates/display_only_from_estimator.rst deleted file mode 100644 index 6d064133fc5e2..0000000000000 --- a/doc/templates/display_only_from_estimator.rst +++ /dev/null @@ -1,13 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples -.. include:: {{module}}.{{objname}}.from_estimator.examples - -.. raw:: html - -
diff --git a/doc/templates/function.rst b/doc/templates/function.rst deleted file mode 100644 index f4b11eda770e4..0000000000000 --- a/doc/templates/function.rst +++ /dev/null @@ -1,12 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}==================== - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/generate_deprecated.sh b/doc/templates/generate_deprecated.sh deleted file mode 100755 index a7301fb5dc419..0000000000000 --- a/doc/templates/generate_deprecated.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -for f in [^d]*; do (head -n2 < $f; echo ' -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** -'; tail -n+3 $f) > deprecated_$f; done diff --git a/doc/templates/index.html b/doc/templates/index.html index db5d02ab9d9ab..a440a69354d0a 100644 --- a/doc/templates/index.html +++ b/doc/templates/index.html @@ -1,25 +1,27 @@ {% extends "layout.html" %} {% set title = 'scikit-learn: machine learning in Python' %} -{% if theme_link_to_live_contributing_page|tobool %} +{% if is_devrelease|tobool %} + {%- set contributing_link = pathto("developers/contributing") %} + {%- set contributing_attrs = "" %} +{%- else %} {%- set contributing_link = "https://scikit-learn.org/dev/developers/contributing.html" %} {%- set contributing_attrs = 'target="_blank" rel="noopener noreferrer"' %} -{%- else %} - {%- set contributing_link = pathto('developers/contributing') %} - {%- set contributing_attrs = '' %} {%- endif %} +{%- import "static/webpack-macros.html" as _webpack with context %} -{% block content %} -
+{% block docs_navbar %} +{{ super() }} + +
-

scikit-learn

-

Machine Learning in Python

- Getting Started - Release Highlights for {{ release_highlights_version }} - GitHub +

scikit-learn

+

Machine Learning in Python

+ Getting Started + Release Highlights for {{ release_highlights_version }}
    @@ -33,258 +35,280 @@

    Machine Learning in

-
+{% endblock docs_navbar %} + +{% block docs_main %} + +
+
-
+
-

Classification

-

Identifying which category an object belongs to.

-

Applications: Spam detection, image recognition.
- Algorithms: - SVM, - nearest neighbors, - random forest, - and more...

+

+ Classification +

+

Identifying which category an object belongs to.

+

+ Applications: Spam detection, image recognition.
+ Algorithms: + Gradient boosting, + nearest neighbors, + random forest, + logistic regression, + and more... +

-
+
-
+
-

Regression

-

Predicting a continuous-valued attribute associated with an object.

-

Applications: Drug response, Stock prices.
- Algorithms: - SVR, - nearest neighbors, - random forest, - and more...

+

+ Regression +

+

Predicting a continuous-valued attribute associated with an object.

+

+ Applications: Drug response, stock prices.
+ Algorithms: + Gradient boosting, + nearest neighbors, + random forest, + ridge, + and more... +

-
+
-
+
-

Clustering

-

Automatic grouping of similar objects into sets.

-

Applications: Customer segmentation, Grouping experiment outcomes
- Algorithms: - k-Means, - spectral clustering, - mean-shift, - and more...

+

+ Clustering +

+

Automatic grouping of similar objects into sets.

+

+ Applications: Customer segmentation, grouping experiment outcomes.
+ Algorithms: + k-Means, + HDBSCAN, + hierarchical clustering, + and more... +

-
+
-
+
-

Dimensionality reduction

-

Reducing the number of random variables to consider.

-

Applications: Visualization, Increased efficiency
- Algorithms: - PCA, - feature selection, - non-negative matrix factorization, - and more...

+

+ Dimensionality reduction +

+

Reducing the number of random variables to consider.

+

+ Applications: Visualization, increased efficiency.
+ Algorithms: + PCA, + feature selection, + non-negative matrix factorization, + and more... +

-
+
-
+
-

Model selection

-

Comparing, validating and choosing parameters and models.

-

Applications: Improved accuracy via parameter tuning
- Algorithms: - grid search, - cross validation, - metrics, - and more...

+

+ Model selection +

+

Comparing, validating and choosing parameters and models.

+

+ Applications: Improved accuracy via parameter tuning.
+ Algorithms: + Grid search, + cross validation, + metrics, + and more... +

-
+
-
+
-

Preprocessing

-

Feature extraction and normalization.

-

Applications: Transforming input data such as text for use with machine learning algorithms.
- Algorithms: - preprocessing, - feature extraction, - and more...

+

+ Preprocessing +

+

Feature extraction and normalization.

+

+ Applications: Transforming input data such as text for use with machine learning algorithms.
+ Algorithms: + Preprocessing, + feature extraction, + and more... +

-
-
-
+{% endblock docs_main %} + +{% block footer %} + +
+
+

News

    -
  • On-going development: - What's new (Changelog) -
  • -
  • March 2023. scikit-learn 1.2.2 is available for download (Changelog). -
  • -
  • January 2023. scikit-learn 1.2.1 is available for download (Changelog). -
  • -
  • December 2022. scikit-learn 1.2.0 is available for download (Changelog). -
  • -
  • October 2022. scikit-learn 1.1.3 is available for download (Changelog). -
  • -
  • August 2022. scikit-learn 1.1.2 is available for download (Changelog). -
  • -
  • May 2022. scikit-learn 1.1.1 is available for download (Changelog). -
  • -
  • May 2022. scikit-learn 1.1.0 is available for download (Changelog). -
  • -
  • December 2021. scikit-learn 1.0.2 is available for download (Changelog). -
  • -
  • October 2021. scikit-learn 1.0.1 is available for download (Changelog). -
  • -
  • September 2021. scikit-learn 1.0 is available for download (Changelog). -
  • -
  • April 2021. scikit-learn 0.24.2 is available for download (Changelog). -
  • -
  • January 2021. scikit-learn 0.24.1 is available for download (Changelog). -
  • -
  • December 2020. scikit-learn 0.24.0 is available for download (Changelog). -
  • -
  • August 2020. scikit-learn 0.23.2 is available for download (Changelog). -
  • -
  • May 2020. scikit-learn 0.23.1 is available for download (Changelog). -
  • -
  • May 2020. scikit-learn 0.23.0 is available for download (Changelog). -
  • -
  • Scikit-learn from 0.23 requires Python 3.6 or newer. -
  • -
  • March 2020. scikit-learn 0.22.2 is available for download (Changelog). -
  • January 2020. scikit-learn 0.22.1 is available for download (Changelog). -
  • December 2019. scikit-learn 0.22 is available for download (Changelog and Release Highlights). -
  • +
  • On-going development: scikit-learn 1.6 (Changelog).
  • +
  • July 2024. scikit-learn 1.5.1 is available for download (Changelog).
  • +
  • May 2024. scikit-learn 1.5.0 is available for download (Changelog).
  • +
  • April 2024. scikit-learn 1.4.2 is available for download (Changelog).
  • +
  • February 2024. scikit-learn 1.4.1.post1 is available for download (Changelog).
  • +
  • January 2024. scikit-learn 1.4.0 is available for download (Changelog).
  • +
  • October 2023. scikit-learn 1.3.2 is available for download (Changelog).
  • +
  • September 2023. scikit-learn 1.3.1 is available for download (Changelog).
  • +
  • June 2023. scikit-learn 1.3.0 is available for download (Changelog).
  • +
  • All releases: What's new (Changelog).
+

Community

- - Help us, donate! - Cite us! +

+ Help us, donate! + Cite us! +

+

Who uses scikit-learn?

-
-
+ +