diff --git a/.binder/postBuild b/.binder/postBuild old mode 100644 new mode 100755 index c33605a68456c..00e8d39b93549 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -6,9 +6,9 @@ set -e # inside a git checkout of the scikit-learn/scikit-learn repo. This script is # generating notebooks from the scikit-learn python examples. -if [[ ! -f /.dockerenv ]]; then - echo "This script was written for repo2docker and is supposed to run inside a docker container." - echo "Exiting because this script can delete data if run outside of a docker container." +if [[ -z "${REPO_DIR}" ]]; then + echo "This script was written for repo2docker and the REPO_DIR environment variable is supposed to be set." + echo "Exiting because this script can delete data if run outside of a repo2docker context." exit 1 fi @@ -23,7 +23,7 @@ find . -delete GENERATED_NOTEBOOKS_DIR=.generated-notebooks cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR -find $GENERATED_NOTEBOOKS_DIR -name '*.py' -exec sphx_glr_python_to_jupyter.py '{}' + +find $GENERATED_NOTEBOOKS_DIR -name '*.py' -exec sphinx_gallery_py2jupyter '{}' + NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb') rm -f $NON_NOTEBOOKS diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f9a1a02e0f62..bd4914056fe10 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2.1 jobs: lint: docker: - - image: cimg/python:3.9.18 + - image: cimg/python:3.10.16 steps: - checkout - run: @@ -11,14 +11,14 @@ jobs: command: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint - run: name: linting command: ./build_tools/linting.sh doc-min-dependencies: docker: - - image: cimg/python:3.9.18 + - image: cimg/base:current-22.04 environment: - MKL_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 @@ -56,7 +56,7 @@ jobs: doc: docker: - - image: cimg/python:3.9.18 + - image: cimg/base:current-22.04 environment: - MKL_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 @@ -98,7 +98,7 @@ jobs: deploy: docker: - - image: cimg/python:3.9.18 + - image: cimg/base:current-22.04 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -107,7 +107,7 @@ jobs: - attach_workspace: at: doc/_build/html - run: ls -ltrh doc/_build/html/stable - - deploy: + - run: command: | if [[ "${CIRCLE_BRANCH}" =~ ^main$|^[0-9]+\.[0-9]+\.X$ ]]; then bash build_tools/circle/push_doc.sh doc/_build/html/stable diff --git a/.cirrus.star b/.cirrus.star deleted file mode 100644 index f0b458d74289a..0000000000000 --- a/.cirrus.star +++ /dev/null @@ -1,37 +0,0 @@ -# This script uses starlark for configuring when a cirrus CI job runs: -# https://cirrus-ci.org/guide/programming-tasks/ - -load("cirrus", "env", "fs", "http") - -def main(ctx): - # Only run for scikit-learn/scikit-learn. For debugging on a fork, you can - # comment out the following condition. - if env.get("CIRRUS_REPO_FULL_NAME") != "scikit-learn/scikit-learn": - return [] - - arm_wheel_yaml = "build_tools/cirrus/arm_wheel.yml" - arm_tests_yaml = "build_tools/cirrus/arm_tests.yml" - - # Nightly jobs always run - if env.get("CIRRUS_CRON", "") == "nightly": - return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml) - - # Get commit message for event. We can not use `git` here because there is - # no command line access in starlark. Thus we need to query the GitHub API - # for the commit message. Note that `CIRRUS_CHANGE_MESSAGE` can not be used - # because it is set to the PR's title and not the latest commit message. - SHA = env.get("CIRRUS_CHANGE_IN_REPO") - REPO = env.get("CIRRUS_REPO_FULL_NAME") - url = "https://api.github.com/repos/" + REPO + "/git/commits/" + SHA - response = http.get(url).json() - commit_msg = response["message"] - - jobs_to_run = "" - - if "[cd build]" in commit_msg or "[cd build cirrus]" in commit_msg: - jobs_to_run += fs.read(arm_wheel_yaml) - - if "[cirrus arm]" in commit_msg: - jobs_to_run += fs.read(arm_tests_yaml) - - return jobs_to_run diff --git a/.codecov.yml b/.codecov.yml index 54ce77b9c1b0e..f4ecd6e7d8fee 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -30,5 +30,4 @@ ignore: - "sklearn/_build_utils" - "sklearn/__check_build" - "sklearn/_min_dependencies.py" -- "**/setup.py" - "**/conftest.py" diff --git a/.coveragerc b/.coveragerc index a8601458a0b07..0d5f02b3edafc 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,9 +1,11 @@ [run] -branch = True +# Use statement coverage rather than branch coverage because +# COVERAGE_CORE=sysmon can make branch coverage slower rather than faster. See +# https://github.com/nedbat/coveragepy/issues/1812 for more details. +branch = False source = sklearn parallel = True omit = */sklearn/externals/* */sklearn/_build_utils/* */benchmarks/* - **/setup.py diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index b261320543fa7..77fb878ee8fe7 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -32,5 +32,17 @@ d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b # PR 26649: Add isort and ruff rules 42173fdb34b5aded79664e045cada719dfbe39dc -# PR #28802: Update black to 24.3.0 +# PR 28802: Update black to 24.3.0 c4c546355667b070edd5c892b206aa4a97af9a0b + +# PR 30694: Enforce ruff rules (RUF) +fe7c4176828af5231f526e76683fb9bdb9ea0367 + +# PR 30695: Apply ruff/flake8-implicit-str-concat rules (ISC) +5cdbbf15e3fade7cc2462ef66dc4ea0f37f390e3 + +# PR 31015: black -> ruff format +ff78e258ccf11068e2b3a433c51517ae56234f88 + +# PR 31226: Enforce ruff/pygrep-hooks rules +b98dc797c480b1b9495f918e201d45ee07f29feb diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 8d9c592ccdc13..0ebed8c85161b 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: true +blank_issues_enabled: false contact_links: - name: Discussions url: https://github.com/scikit-learn/scikit-learn/discussions/new @@ -13,5 +13,5 @@ contact_links: url: https://discord.gg/h9qyrK8Jc8 about: Developers and users can be found on the Discord server - name: Blank issue - url: https://github.com/scikit-learn/scikit-learn/issues/new + url: https://github.com/scikit-learn/scikit-learn/issues/new?template=BLANK_ISSUE about: Please note that GitHub Discussions should be used in most cases instead diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000000..7ac17eb0442ad --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,21 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions as recommended in SPEC8: + # https://github.com/scientific-python/specs/pull/325 + # At the time of writing, release critical workflows such as + # pypa/gh-action-pypi-publish should use hash-based versioning for security + # reasons. This strategy may be generalized to all other github actions + # in the future. + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + groups: + actions: + patterns: + - "*" + labels: + - "Build / CI" + - "dependencies" + reviewers: + - "scikit-learn/core-devs" diff --git a/.github/workflows/arm-unit-tests.yml b/.github/workflows/arm-unit-tests.yml new file mode 100644 index 0000000000000..e7636d55d7945 --- /dev/null +++ b/.github/workflows/arm-unit-tests.yml @@ -0,0 +1,54 @@ +name: Unit test for ARM +permissions: + contents: read + +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + if: github.repository == 'scikit-learn/scikit-learn' + + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + - name: Install linters + run: | + source build_tools/shared.sh + # Include pytest compatibility with mypy + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint + - name: Run linters + run: ./build_tools/linting.sh + - name: Run Meson OpenMP checks + run: | + pip install ninja meson scipy + python build_tools/check-meson-openmp-dependencies.py + + run-unit-tests: + name: Run unit tests + runs-on: ubuntu-24.04-arm + if: github.repository == 'scikit-learn/scikit-learn' + needs: [lint] + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: mamba-org/setup-micromamba@v2 + with: + environment-file: build_tools/github/pymin_conda_forge_arm_linux-aarch64_conda.lock + environment-name: ci + cache-environment: true + + - name: Build and run tests + shell: bash -el {0} + run: bash build_tools/github/build_test_arm.sh diff --git a/.github/workflows/artifact-redirector.yml b/.github/workflows/artifact-redirector.yml index 3fdbc06fac386..690cacefda935 100644 --- a/.github/workflows/artifact-redirector.yml +++ b/.github/workflows/artifact-redirector.yml @@ -15,7 +15,7 @@ jobs: name: Run CircleCI artifacts redirector steps: - name: GitHub Action step - uses: larsoner/circleci-artifacts-redirector-action@master + uses: scientific-python/circleci-artifacts-redirector-action@v1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} api-token: ${{ secrets.CIRCLECI_TOKEN }} diff --git a/.github/workflows/assign.yml b/.github/workflows/assign.yml index fa3b6f95a5e95..a69b60ee0f0a0 100644 --- a/.github/workflows/assign.yml +++ b/.github/workflows/assign.yml @@ -19,8 +19,11 @@ jobs: && !github.event.issue.assignee steps: - run: | + # Using REST API directly because assigning through gh has some severe limitations. For more details, see + # https://github.com/scikit-learn/scikit-learn/issues/29395#issuecomment-2206776963 echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" - gh issue edit $ISSUE --add-assignee ${{ github.event.comment.user.login }} + curl -H "Authorization: token $GH_TOKEN" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' \ + https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees gh issue edit $ISSUE --remove-label "help wanted" env: GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/check-changelog.yml b/.github/workflows/check-changelog.yml index d5bfc8ef0f430..00e6a81f8cd0b 100644 --- a/.github/workflows/check-changelog.yml +++ b/.github/workflows/check-changelog.yml @@ -1,68 +1,36 @@ name: Check Changelog +permissions: + contents: read + # This check makes sure that the changelog is properly updated # when a PR introduces a change in a test file. # To bypass this check, label the PR with "No Changelog Needed". on: pull_request: - types: [opened, edited, labeled, unlabeled, synchronize] + types: [opened, synchronize, labeled, unlabeled] jobs: check: name: A reviewer will let you know if it is required or can be bypassed runs-on: ubuntu-latest - if: ${{ contains(github.event.pull_request.labels.*.name, 'No Changelog Needed') == 0 }} steps: - - name: Get PR number and milestone - run: | - echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV - echo "TAGGED_MILESTONE=${{ github.event.pull_request.milestone.title }}" >> $GITHUB_ENV - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: '0' - - name: Check the changelog entry + - name: Check if tests have changed + id: tests_changed run: | set -xe changed_files=$(git diff --name-only origin/main) # Changelog should be updated only if tests have been modified - if [[ ! "$changed_files" =~ tests ]] + if [[ "$changed_files" =~ tests ]] then - exit 0 - fi - all_changelogs=$(cat ./doc/whats_new/v*.rst) - if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]] - then - echo "Changelog has been updated." - # If the pull request is milestoned check the correspondent changelog - if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst - then - expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst) - if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]] - then - echo "Changelog and milestone correspond." - else - echo "Changelog and milestone do not correspond." - echo "If you see this error make sure that the tagged milestone for the PR" - echo "and the edited changelog filename properly match." - exit 1 - fi - fi - else - echo "A Changelog entry is missing." - echo "" - echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'" - echo "to document your change assuming that the PR will be merged" - echo "in time for the next release of scikit-learn." - echo "" - echo "Look at other entries in that file for inspiration and please" - echo "reference this pull request using the ':pr:' directive and" - echo "credit yourself (and other contributors if applicable) with" - echo "the ':user:' directive." - echo "" - echo "If you see this error and there is already a changelog entry," - echo "check that the PR number is correct." - echo "" - echo "If you believe that this PR does not warrant a changelog" - echo "entry, say so in a comment so that a maintainer will label" - echo "the PR with 'No Changelog Needed' to bypass this check." - exit 1 + echo "check_changelog=true" >> $GITHUB_OUTPUT fi + + - name: Check changelog entry + if: steps.tests_changed.outputs.check_changelog == 'true' + uses: scientific-python/action-towncrier-changelog@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BOT_USERNAME: changelog-bot diff --git a/.github/workflows/check-sdist.yml b/.github/workflows/check-sdist.yml index c02af711bdb6c..d97236dae1e40 100644 --- a/.github/workflows/check-sdist.yml +++ b/.github/workflows/check-sdist.yml @@ -1,4 +1,6 @@ name: "Check sdist" +permissions: + contents: read on: schedule: @@ -11,10 +13,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - name: Install dependencies # scipy and cython are required to build sdist run: | diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 4d38b22d71ab8..58b8fbf5c4ce7 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,7 +29,7 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'javascript-typescript', 'python' ] + language: [ 'javascript-typescript', 'python', 'actions' ] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both diff --git a/.github/workflows/cuda-ci.yml b/.github/workflows/cuda-ci.yml new file mode 100644 index 0000000000000..028ff06903e8a --- /dev/null +++ b/.github/workflows/cuda-ci.yml @@ -0,0 +1,78 @@ +name: CUDA GPU +permissions: + contents: read + +# Only run this workflow when a Pull Request is labeled with the +# 'CUDA CI' label. +on: + pull_request: + types: + - labeled + +jobs: + build_wheel: + if: contains(github.event.pull_request.labels.*.name, 'CUDA CI') + runs-on: "ubuntu-latest" + name: Build wheel for Pull Request + steps: + - uses: actions/checkout@v4 + + - name: Build wheels + uses: pypa/cibuildwheel@faf86a6ed7efa889faf6996aa23820831055001a + env: + CIBW_BUILD: cp313-manylinux_x86_64 + CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 + CIBW_BUILD_VERBOSITY: 1 + CIBW_ARCHS: x86_64 + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels + path: ./wheelhouse/*.whl + + tests: + if: contains(github.event.pull_request.labels.*.name, 'CUDA CI') + needs: [build_wheel] + runs-on: + group: cuda-gpu-runner-group + # Set this high enough so that the tests can comforatble run. We set a + # timeout to make abusing this workflow less attractive. + timeout-minutes: 20 + name: Run Array API unit tests + steps: + - uses: actions/download-artifact@v4 + with: + pattern: cibw-wheels + path: ~/dist + + - uses: actions/setup-python@v5 + with: + # XXX: The 3.12.4 release of Python on GitHub Actions is corrupted: + # https://github.com/actions/setup-python/issues/886 + python-version: '3.12.3' + - name: Checkout main repository + uses: actions/checkout@v4 + - name: Cache conda environment + id: cache-conda + uses: actions/cache@v4 + with: + path: ~/conda + key: ${{ runner.os }}-build-${{ hashFiles('build_tools/github/create_gpu_environment.sh') }}-${{ hashFiles('build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock') }} + - name: Install miniforge + if: ${{ steps.cache-conda.outputs.cache-hit != 'true' }} + run: bash build_tools/github/create_gpu_environment.sh + - name: Install scikit-learn + run: | + source "${HOME}/conda/etc/profile.d/conda.sh" + conda activate sklearn + pip install ~/dist/cibw-wheels/$(ls ~/dist/cibw-wheels) + + - name: Run array API tests + run: | + source "${HOME}/conda/etc/profile.d/conda.sh" + conda activate sklearn + python -c "import sklearn; sklearn.show_versions()" + + SCIPY_ARRAY_API=1 pytest --pyargs sklearn -k 'array_api' -v + # Run in /home/runner to not load sklearn from the checkout repo + working-directory: /home/runner diff --git a/.github/workflows/cuda-label-remover.yml b/.github/workflows/cuda-label-remover.yml new file mode 100644 index 0000000000000..bb87f5419b662 --- /dev/null +++ b/.github/workflows/cuda-label-remover.yml @@ -0,0 +1,23 @@ +name: Remove "CUDA CI" Label + +# This workflow removes the "CUDA CI" label that triggers the actual +# CUDA CI. It is separate so that we can use the `pull_request_target` +# trigger which has a API token with write access. +on: + pull_request_target: + types: + - labeled + +# In order to remove the "CUDA CI" label we need to have write permissions for PRs +permissions: + pull-requests: write + +jobs: + label-remover: + if: contains(github.event.pull_request.labels.*.name, 'CUDA CI') + name: Remove "CUDA CI" Label + runs-on: ubuntu-24.04 + steps: + - uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: CUDA CI diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml new file mode 100644 index 0000000000000..47e54f6125638 --- /dev/null +++ b/.github/workflows/emscripten.yml @@ -0,0 +1,106 @@ +name: Test Emscripten/Pyodide build + +on: + schedule: + # Nightly build at 3:42 A.M. + - cron: "42 3 */1 * *" + push: + branches: + - main + # Release branches + - "[0-9]+.[0-9]+.X" + pull_request: + branches: + - main + - "[0-9]+.[0-9]+.X" + # Manual run + workflow_dispatch: + +env: + FORCE_COLOR: 3 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + check_build_trigger: + name: Check build trigger + runs-on: ubuntu-latest + if: github.repository == 'scikit-learn/scikit-learn' + outputs: + build: ${{ steps.check_build_trigger.outputs.build }} + steps: + - name: Checkout scikit-learn + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + persist-credentials: false + + - id: check_build_trigger + name: Check build trigger + shell: bash + run: | + set -e + set -x + + COMMIT_MSG=$(git log --no-merges -1 --oneline) + + # The commit marker "[pyodide]" will trigger the build when required + if [[ "$GITHUB_EVENT_NAME" == schedule || + "$GITHUB_EVENT_NAME" == workflow_dispatch || + "$COMMIT_MSG" =~ \[pyodide\] ]]; then + echo "build=true" >> $GITHUB_OUTPUT + fi + + build_wasm_wheel: + name: Build WASM wheel + runs-on: ubuntu-latest + needs: check_build_trigger + if: needs.check_build_trigger.outputs.build + steps: + - name: Checkout scikit-learn + uses: actions/checkout@v4 + with: + persist-credentials: false + + - uses: pypa/cibuildwheel@faf86a6ed7efa889faf6996aa23820831055001a + env: + CIBW_PLATFORM: pyodide + SKLEARN_SKIP_OPENMP_TEST: "true" + SKLEARN_SKIP_NETWORK_TESTS: 1 + CIBW_TEST_REQUIRES: "pytest pandas" + # -s pytest argument is needed to avoid an issue in pytest output capturing with Pyodide + CIBW_TEST_COMMAND: "python -m pytest -svra --pyargs sklearn --durations 20 --showlocals" + + - name: Upload wheel artifact + uses: actions/upload-artifact@v4 + with: + name: pyodide_wheel + path: ./wheelhouse/*.whl + if-no-files-found: error + + # Push to https://anaconda.org/scientific-python-nightly-wheels/scikit-learn + # WARNING: this job will overwrite any existing WASM wheels. + upload-wheels: + name: Upload scikit-learn WASM wheels to Anaconda.org + runs-on: ubuntu-latest + permissions: {} + environment: upload_anaconda + needs: [build_wasm_wheel] + if: github.repository == 'scikit-learn/scikit-learn' && github.event_name != 'pull_request' + steps: + - name: Download wheel artifact + uses: actions/download-artifact@v4 + with: + path: wheelhouse/ + merge-multiple: true + + - name: Push to Anaconda PyPI index + uses: scientific-python/upload-nightly-action@b36e8c0c10dbcfd2e05bf95f17ef8c14fd708dbf # 0.6.2 + with: + artifacts_path: wheelhouse/ + anaconda_nightly_upload_token: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }} diff --git a/.github/workflows/label-blank-issue.yml b/.github/workflows/label-blank-issue.yml index fce4fe6f9c74e..7c00984d1169f 100644 --- a/.github/workflows/label-blank-issue.yml +++ b/.github/workflows/label-blank-issue.yml @@ -1,4 +1,6 @@ name: Labels Blank issues +permissions: + issues: write on: issues: diff --git a/.github/workflows/labeler-title-regex.yml b/.github/workflows/labeler-title-regex.yml index 10195eca13a73..8b127925cbdae 100644 --- a/.github/workflows/labeler-title-regex.yml +++ b/.github/workflows/labeler-title-regex.yml @@ -13,9 +13,9 @@ permissions: jobs: labeler: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.9' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fdc993c1b3fdd..f8075e779c56b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} @@ -31,13 +31,13 @@ jobs: - name: Install dependencies run: | + curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/shared.sh --retry 5 -o ./build_tools/shared.sh source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint # we save the versions of the linters to be used in the error message later. python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt - python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt - name: Run linting @@ -52,7 +52,7 @@ jobs: - name: Upload Artifact if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: lint-log path: | @@ -72,7 +72,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -84,7 +84,7 @@ jobs: - name: Download artifact id: download-artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: lint-log diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index b8940ae133ad9..ad24ea805eb8a 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -18,7 +18,7 @@ jobs: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.8' @@ -39,10 +39,13 @@ jobs: run: | python build_tools/github/check_wheels.py - name: Publish package to TestPyPI - uses: pypa/gh-action-pypi-publish@v1.8.5 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 with: - repository_url: https://test.pypi.org/legacy/ + repository-url: https://test.pypi.org/legacy/ + print-hash: true if: ${{ github.event.inputs.pypi_repo == 'testpypi' }} - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.5 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 if: ${{ github.event.inputs.pypi_repo == 'pypi' }} + with: + print-hash: true diff --git a/.github/workflows/update-lock-files.yml b/.github/workflows/update-lock-files.yml index 50d62c85d00a6..3d67bd9f70701 100644 --- a/.github/workflows/update-lock-files.yml +++ b/.github/workflows/update-lock-files.yml @@ -1,5 +1,7 @@ # Workflow to update lock files name: Update lock files +permissions: + contents: read on: workflow_dispatch: @@ -22,12 +24,11 @@ jobs: - name: scipy-dev update_script_args: "--select-tag scipy-dev" additional_commit_message: "[scipy-dev]" - - name: cirrus-arm - update_script_args: "--select-tag arm" - additional_commit_message: "[cirrus arm]" - - name: pypy - update_script_args: "--select-tag pypy" - additional_commit_message: "[pypy]" + - name: free-threaded + update_script_args: "--select-tag free-threaded" + additional_commit_message: "[free-threaded]" + - name: array-api + update_script_args: "--select-tag cuda" steps: - uses: actions/checkout@v4 @@ -35,6 +36,7 @@ jobs: run: | source build_tools/shared.sh source $CONDA/bin/activate + conda update -n base --all conda install -n base conda conda-libmamba-solver -y conda config --set solver libmamba conda install -c conda-forge "$(get_dep conda-lock min)" -y @@ -43,7 +45,7 @@ jobs: - name: Create Pull Request id: cpr - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v7 with: token: ${{ secrets.BOT_GITHUB_TOKEN }} push-to-fork: scikit-learn-bot/scikit-learn @@ -59,6 +61,21 @@ jobs: ### Note If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch. + # The CUDA workflow needs to be triggered explicitly as it uses an expensive runner + - name: Trigger additional tests + if: steps.cpr.outputs.pull-request-number != '' && matrix.name == 'array-api' + env: + GH_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }} + PR_NUMBER: ${{steps.cpr.outputs.pull-request-number}} + run: | + curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $GH_TOKEN" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/scikit-learn/scikit-learn/issues/$PR_NUMBER/labels \ + -d '{"labels":["CUDA CI"]}' + - name: Check Pull Request if: steps.cpr.outputs.pull-request-number != '' run: | diff --git a/.github/workflows/update_tracking_issue.yml b/.github/workflows/update_tracking_issue.yml index d4538fe6848d8..54db3f50bc43b 100644 --- a/.github/workflows/update_tracking_issue.yml +++ b/.github/workflows/update_tracking_issue.yml @@ -11,6 +11,9 @@ # Where JOB_NAME is contains the status of the job you are interested in name: "Update tracking issue" +permissions: + contents: read + on: workflow_call: inputs: @@ -26,7 +29,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'scikit-learn/scikit-learn' && github.event_name == 'schedule' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.9' diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index d30f85ff3d1e6..33e8897c147f7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -1,5 +1,7 @@ # Workflow to build and test wheels name: Wheel builder +permissions: + contents: read on: schedule: @@ -32,7 +34,7 @@ jobs: steps: - name: Checkout scikit-learn - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} @@ -44,6 +46,11 @@ jobs: build_wheels: name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform_id }}-${{ matrix.manylinux_image }} runs-on: ${{ matrix.os }} + + # For conda-incubator/setup-miniconda to work + defaults: + run: + shell: bash -el {0} needs: check_build_trigger if: needs.check_build_trigger.outputs.build @@ -53,11 +60,6 @@ jobs: matrix: include: # Window 64 bit - # Note: windows-2019 is needed for older Python versions: - # https://github.com/scikit-learn/scikit-learn/issues/22530 - - os: windows-latest - python: 39 - platform_id: win_amd64 - os: windows-latest python: 310 platform_id: win_amd64 @@ -67,19 +69,19 @@ jobs: - os: windows-latest python: 312 platform_id: win_amd64 + - os: windows-latest + python: 313 + platform_id: win_amd64 + - os: windows-latest + python: 313t + platform_id: win_amd64 + free_threaded_support: True # Linux 64 bit manylinux2014 - - os: ubuntu-latest - python: 39 - platform_id: manylinux_x86_64 - manylinux_image: manylinux2014 - - # NumPy on Python 3.10 only supports 64bit and is only available with manylinux2014 - os: ubuntu-latest python: 310 platform_id: manylinux_x86_64 manylinux_image: manylinux2014 - - os: ubuntu-latest python: 311 platform_id: manylinux_x86_64 @@ -88,25 +90,53 @@ jobs: python: 312 platform_id: manylinux_x86_64 manylinux_image: manylinux2014 + - os: ubuntu-latest + python: 313 + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 + - os: ubuntu-latest + python: 313t + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 + free_threaded_support: True + + # # Linux 64 bit manylinux2014 + - os: ubuntu-24.04-arm + python: 310 + platform_id: manylinux_aarch64 + manylinux_image: manylinux2014 + - os: ubuntu-24.04-arm + python: 311 + platform_id: manylinux_aarch64 + manylinux_image: manylinux2014 + - os: ubuntu-24.04-arm + python: 312 + platform_id: manylinux_aarch64 + manylinux_image: manylinux2014 + - os: ubuntu-24.04-arm + python: 313 + platform_id: manylinux_aarch64 + manylinux_image: manylinux2014 # MacOS x86_64 - - os: macos-12 - python: 39 - platform_id: macosx_x86_64 - - os: macos-12 + - os: macos-13 python: 310 platform_id: macosx_x86_64 - - os: macos-12 + - os: macos-13 python: 311 platform_id: macosx_x86_64 - - os: macos-12 + - os: macos-13 python: 312 platform_id: macosx_x86_64 + - os: macos-13 + python: 313 + platform_id: macosx_x86_64 + - os: macos-13 + python: 313t + platform_id: macosx_x86_64 + free_threaded_support: True # MacOS arm64 - - os: macos-14 - python: 39 - platform_id: macosx_arm64 - os: macos-14 python: 310 platform_id: macosx_arm64 @@ -116,49 +146,31 @@ jobs: - os: macos-14 python: 312 platform_id: macosx_arm64 + - os: macos-14 + python: 313 + platform_id: macosx_arm64 + - os: macos-14 + python: 313t + platform_id: macosx_arm64 + free_threaded_support: True steps: - name: Checkout scikit-learn - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: python-version: "3.11" # update once build dependencies are available - - name: Install conda for macos arm64 - if: ${{ matrix.platform_id == 'macosx_arm64' }} - run: | - set -ex - # macos arm64 runners do not have conda installed. Thus we much install conda manually - EXPECTED_SHA="dd832d8a65a861b5592b2cf1d55f26031f7c1491b30321754443931e7b1e6832" - MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-MacOSX-arm64.sh" - curl -L --retry 10 $MINIFORGE_URL -o miniforge.sh - - # Check SHA - file_sha=$(shasum -a 256 miniforge.sh | awk '{print $1}') - if [ "$EXPECTED_SHA" != "$file_sha" ]; then - echo "SHA values did not match!" - exit 1 - fi - - # Install miniforge - MINIFORGE_PATH=$HOME/miniforge - bash ./miniforge.sh -b -p $MINIFORGE_PATH - echo "$MINIFORGE_PATH/bin" >> $GITHUB_PATH - echo "CONDA_HOME=$MINIFORGE_PATH" >> $GITHUB_ENV - - - name: Set conda environment for non-macos arm64 environments - if: ${{ matrix.platform_id != 'macosx_arm64' }} - run: | - # Non-macos arm64 envrionments already have conda installed - echo "CONDA_HOME=/usr/local/miniconda" >> $GITHUB_ENV + - uses: conda-incubator/setup-miniconda@v3 + if: ${{ startsWith(matrix.platform_id, 'macosx') }} - name: Build and test wheels env: - CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease }} + CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease_pythons }} + CIBW_FREE_THREADED_SUPPORT: ${{ matrix.free_threaded_support }} CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 - SKLEARN_BUILD_PARALLEL=3 CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} CIBW_ARCHS: all CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }} @@ -168,17 +180,24 @@ jobs: # toolchain CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir} + CIBW_BEFORE_BUILD: bash {project}/build_tools/wheels/cibw_before_build.sh {project} CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }} + CIBW_ENVIRONMENT_PASS_LINUX: RUNNER_OS CIBW_TEST_REQUIRES: pytest pandas - CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh - CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }} + # On Windows, we use a custom Docker image and CIBW_TEST_REQUIRES_WINDOWS + # does not make sense because it would install dependencies in the host + # rather than inside the Docker image + CIBW_TEST_REQUIRES_WINDOWS: "" + CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh {project} + CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }} {project} CIBW_BUILD_VERBOSITY: 1 run: bash build_tools/wheels/build_wheels.sh - name: Store artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: cibw-wheels-cp${{ matrix.python }}-${{ matrix.platform_id }} path: wheelhouse/*.whl update-tracker: @@ -199,17 +218,15 @@ jobs: steps: - name: Checkout scikit-learn - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.9" # update once build dependencies are available + python-version: "3.12" - name: Build source distribution run: bash build_tools/github/build_source.sh - env: - SKLEARN_BUILD_PARALLEL: 3 - name: Test source distribution run: bash build_tools/github/test_source.sh @@ -217,8 +234,9 @@ jobs: SKLEARN_SKIP_NETWORK_TESTS: 1 - name: Store artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: cibw-sdist path: dist/*.tar.gz # Upload the wheels and the source distribution @@ -232,12 +250,14 @@ jobs: steps: - name: Checkout scikit-learn - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Download artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: + pattern: cibw-* path: dist + merge-multiple: true - name: Setup Python uses: actions/setup-python@v5 @@ -247,6 +267,6 @@ jobs: # Secret variables need to be mapped to environment variables explicitly SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }} SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_STAGING_UPLOAD_TOKEN }} - ARTIFACTS_PATH: dist/artifact + ARTIFACTS_PATH: dist # Force a replacement if the remote file already exists run: bash build_tools/github/upload_anaconda.sh diff --git a/.gitignore b/.gitignore index 9f3b453bbfd74..7e00b8802bd01 100644 --- a/.gitignore +++ b/.gitignore @@ -15,11 +15,19 @@ dist/ MANIFEST doc/sg_execution_times.rst doc/_build/ +doc/api/*.rst doc/auto_examples/ +doc/css/* +!doc/css/.gitkeep doc/modules/generated/ doc/datasets/generated/ +doc/developers/maintainer.rst +doc/index.rst doc/min_dependency_table.rst doc/min_dependency_substitutions.rst +# release notes generated by towncrier +doc/whats_new/notes-towncrier.rst + *.pdf pip-log.txt scikit_learn.egg-info/ @@ -83,31 +91,8 @@ _configtest.o.d # virtualenv from advanced installation guide sklearn-env/ -# files generated from a template -sklearn/_loss/_loss.pyx -sklearn/utils/_seq_dataset.pyx -sklearn/utils/_seq_dataset.pxd -sklearn/utils/_weight_vector.pyx -sklearn/utils/_weight_vector.pxd -sklearn/linear_model/_sag_fast.pyx -sklearn/linear_model/_sgd_fast.pyx -sklearn/metrics/_dist_metrics.pyx -sklearn/metrics/_dist_metrics.pxd -sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd -sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx -sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx -sklearn/metrics/_pairwise_distances_reduction/_base.pxd -sklearn/metrics/_pairwise_distances_reduction/_base.pyx -sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd -sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx -sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd -sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx -sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd -sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx -sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors_classmode.pyx -sklearn/neighbors/_ball_tree.pyx -sklearn/neighbors/_binary_tree.pxi -sklearn/neighbors/_kd_tree.pyx - # Default JupyterLite content jupyterlite_contents + +# file recognised by vscode IDEs containing env variables +.env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 31af43b6bbab0..48871d2a4abed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,29 +1,33 @@ +exclude: '^(.git/|sklearn/externals/|asv_benchmarks/env/)' repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.2.1 + rev: v0.11.7 hooks: - id: ruff args: ["--fix", "--output-format=full"] -- repo: https://github.com/psf/black - rev: 24.3.0 - hooks: - - id: black + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.9.0 + rev: v1.15.0 hooks: - id: mypy files: sklearn/ additional_dependencies: [pytest==6.2.4] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.15.0 + rev: v0.16.6 hooks: # TODO: add the double-quote-cython-strings hook when it's usability has improved: # possibility to pass a directory and use it as a check instead of auto-formatter. - id: cython-lint +- repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.7.1 + hooks: + - id: prettier + files: ^doc/scss/|^doc/js/scripts/ + exclude: ^doc/js/scripts/vendor/ + types_or: ["scss", "javascript"] diff --git a/.spin/cmds.py b/.spin/cmds.py new file mode 100644 index 0000000000000..954749b8005c2 --- /dev/null +++ b/.spin/cmds.py @@ -0,0 +1,29 @@ +import shutil +import sys + +import click +from spin.cmds import util + + +@click.command() +def clean(): + """🪥 Clean build folder. + + Very rarely needed since meson-python recompiles as needed when sklearn is + imported. + + One known use case where "spin clean" is useful: avoid compilation errors + when switching from numpy<2 to numpy>=2 in the same conda environment or + virtualenv. + """ + util.run([sys.executable, "-m", "pip", "uninstall", "scikit-learn", "-y"]) + default_meson_build_dir = ( + f"build/cp{sys.version_info.major}{sys.version_info.minor}" + ) + click.secho( + f"removing default Meson build dir: {default_meson_build_dir}", + bold=True, + fg="bright_blue", + ) + + shutil.rmtree(default_meson_build_dir, ignore_errors=True) diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000000..c3e367c124f81 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,48 @@ +cff-version: 1.2.0 +title: scikit-learn +type: software +authors: + - name: "The scikit-learn developers" +message: "If you use scikit-learn in a scientific publication, we would appreciate citations to the following paper:" +preferred-citation: + type: article + title: "Scikit-learn: Machine Learning in Python" + authors: + - family-names: "Pedregosa" + given-names: "Fabian" + - family-names: "Varoquaux" + given-names: "Gaël" + - family-names: "Gramfort" + given-names: "Alexandre" + - family-names: "Michel" + given-names: "Vincent" + - family-names: "Thirion" + given-names: "Bertrand" + - family-names: "Grisel" + given-names: "Olivier" + - family-names: "Blondel" + given-names: "Mathieu" + - family-names: "Prettenhofer" + given-names: "Peter" + - family-names: "Weiss" + given-names: "Ron" + - family-names: "Dubourg" + given-names: "Vincent" + - family-names: "Vanderplas" + given-names: "Jake" + - family-names: "Passos" + given-names: "Alexandre" + - family-names: "Cournapeau" + given-names: "David" + - family-names: "Brucher" + given-names: "Matthieu" + - family-names: "Perrot" + given-names: "Matthieu" + - family-names: "Duchesnay" + given-names: "Édouard" + journal: "Journal of Machine Learning Research" + volume: 12 + start: 2825 + end: 2830 + year: 2011 + url: "https://jmlr.csail.mit.edu/papers/v12/pedregosa11a.html" diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 23016563a5f6e..b4e1709e67c3f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -13,4 +13,3 @@ all priceless contributions. We abide by the principles of openness, respect, and consideration of others of the Python Software Foundation: https://www.python.org/psf/codeofconduct/ - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 1596d4cd011df..0000000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,36 +0,0 @@ -include *.rst -include *.build -recursive-include sklearn *.build -recursive-include doc * -recursive-include examples * -recursive-include sklearn *.c *.cpp *.h *.pyx *.pxd *.pxi *.tp -recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz -include COPYING -include README.rst -include pyproject.toml -include sklearn/externals/README -include sklearn/svm/src/liblinear/COPYRIGHT -include sklearn/svm/src/libsvm/LIBSVM_CHANGES -include conftest.py -include Makefile -include MANIFEST.in -include .coveragerc - -# exclude from sdist -recursive-exclude asv_benchmarks * -recursive-exclude benchmarks * -recursive-exclude build_tools * -recursive-exclude maint_tools * -recursive-exclude benchmarks * -recursive-exclude .binder * -recursive-exclude .circleci * -exclude .cirrus.star -exclude .codecov.yml -exclude .git-blame-ignore-revs -exclude .mailmap -exclude .pre-commit-config.yaml -exclude azure-pipelines.yml -exclude CODE_OF_CONDUCT.md -exclude CONTRIBUTING.md -exclude SECURITY.md -exclude PULL_REQUEST_TEMPLATE.md diff --git a/Makefile b/Makefile index 52374ba44ff79..eb6ec39edcbdc 100644 --- a/Makefile +++ b/Makefile @@ -1,70 +1,27 @@ # simple makefile to simplify repetitive build env management tasks under posix -# caution: testing won't work on windows, see README - PYTHON ?= python -CYTHON ?= cython -PYTEST ?= pytest -CTAGS ?= ctags - -# skip doctests on 32bit python -BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))') +DEFAULT_MESON_BUILD_DIR = build/cp$(shell python -c 'import sys; print(f"{sys.version_info.major}{sys.version_info.minor}")' ) -all: clean inplace test +all: + @echo "Please use 'make ' where is one of" + @echo " dev build scikit-learn with Meson" + @echo " clean clean scikit-learn Meson build. Very rarely needed," + @echo " since meson-python recompiles on import." -clean-ctags: - rm -f tags +.PHONY: all -clean: clean-ctags - $(PYTHON) setup.py clean - rm -rf dist - -in: inplace # just a shortcut -inplace: - $(PYTHON) setup.py build_ext -i +dev: dev-meson dev-meson: pip install --verbose --no-build-isolation --editable . --config-settings editable-verbose=true +clean: clean-meson + clean-meson: pip uninstall -y scikit-learn - -test-code: in - $(PYTEST) --showlocals -v sklearn --durations=20 -test-sphinxext: - $(PYTEST) --showlocals -v doc/sphinxext/ -test-doc: -ifeq ($(BITS),64) - $(PYTEST) $(shell find doc -name '*.rst' | sort) -endif -test-code-parallel: in - $(PYTEST) -n auto --showlocals -v sklearn --durations=20 - -test-coverage: - rm -rf coverage .coverage - $(PYTEST) sklearn --showlocals -v --cov=sklearn --cov-report=html:coverage -test-coverage-parallel: - rm -rf coverage .coverage .coverage.* - $(PYTEST) sklearn -n auto --showlocals -v --cov=sklearn --cov-report=html:coverage - -test: test-code test-sphinxext test-doc - -trailing-spaces: - find sklearn -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \; - -cython: - python setup.py build_src - -ctags: - # make tags for symbol based navigation in emacs and vim - # Install with: sudo apt-get install exuberant-ctags - $(CTAGS) --python-kinds=-i -R sklearn - -doc: inplace - $(MAKE) -C doc html - -doc-noplot: inplace - $(MAKE) -C doc html-noplot - -code-analysis: - build_tools/linting.sh + # It seems in some cases removing the folder avoids weird compilation + # errors (e.g. when switching from numpy>=2 to numpy<2). For some + # reason ninja clean -C $(DEFAULT_MESON_BUILD_DIR) is not + # enough. + rm -rf $(DEFAULT_MESON_BUILD_DIR) diff --git a/README.rst b/README.rst index 4ac297063c26e..4f4741a090dee 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ .. -*- mode: rst -*- -|Azure| |CirrusCI| |Codecov| |CircleCI| |Nightly wheels| |Black| |PythonVersion| |PyPi| |DOI| |Benchmark| +|Azure| |Codecov| |CircleCI| |Nightly wheels| |Ruff| |PythonVersion| |PyPi| |DOI| |Benchmark| .. |Azure| image:: https://dev.azure.com/scikit-learn/scikit-learn/_apis/build/status/scikit-learn.scikit-learn?branchName=main :target: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main @@ -8,38 +8,35 @@ .. |CircleCI| image:: https://circleci.com/gh/scikit-learn/scikit-learn/tree/main.svg?style=shield :target: https://circleci.com/gh/scikit-learn/scikit-learn -.. |CirrusCI| image:: https://img.shields.io/cirrus/github/scikit-learn/scikit-learn/main?label=Cirrus%20CI - :target: https://cirrus-ci.com/github/scikit-learn/scikit-learn/main - .. |Codecov| image:: https://codecov.io/gh/scikit-learn/scikit-learn/branch/main/graph/badge.svg?token=Pk8G9gg3y9 :target: https://codecov.io/gh/scikit-learn/scikit-learn .. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule :target: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule +.. |Ruff| image:: https://img.shields.io/badge/code%20style-ruff-000000.svg + :target: https://github.com/astral-sh/ruff + .. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/scikit-learn.svg :target: https://pypi.org/project/scikit-learn/ .. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn :target: https://pypi.org/project/scikit-learn -.. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/psf/black - .. |DOI| image:: https://zenodo.org/badge/21369/scikit-learn/scikit-learn.svg :target: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn .. |Benchmark| image:: https://img.shields.io/badge/Benchmarked%20by-asv-blue :target: https://scikit-learn.org/scikit-learn-benchmarks -.. |PythonMinVersion| replace:: 3.9 -.. |NumPyMinVersion| replace:: 1.19.5 -.. |SciPyMinVersion| replace:: 1.6.0 +.. |PythonMinVersion| replace:: 3.10 +.. |NumPyMinVersion| replace:: 1.22.0 +.. |SciPyMinVersion| replace:: 1.8.0 .. |JoblibMinVersion| replace:: 1.2.0 .. |ThreadpoolctlMinVersion| replace:: 3.1.0 -.. |MatplotlibMinVersion| replace:: 3.3.4 -.. |Scikit-ImageMinVersion| replace:: 0.17.2 -.. |PandasMinVersion| replace:: 1.1.5 +.. |MatplotlibMinVersion| replace:: 3.5.0 +.. |Scikit-ImageMinVersion| replace:: 0.19.0 +.. |PandasMinVersion| replace:: 1.4.0 .. |SeabornMinVersion| replace:: 0.9.0 .. |PytestMinVersion| replace:: 7.1.2 .. |PlotlyMinVersion| replace:: 5.14.0 @@ -75,10 +72,6 @@ scikit-learn requires: ======= -**Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4.** -scikit-learn 1.0 and later require Python 3.7 or newer. -scikit-learn 1.1 and later require Python 3.8 or newer. - Scikit-learn plotting capabilities (i.e., functions start with ``plot_`` and classes end with ``Display``) require Matplotlib (>= |MatplotlibMinVersion|). For running the examples Matplotlib >= |MatplotlibMinVersion| is required. @@ -187,16 +180,16 @@ Communication - Logos & Branding: https://github.com/scikit-learn/scikit-learn/tree/main/doc/logos - Blog: https://blog.scikit-learn.org - Calendar: https://blog.scikit-learn.org/calendar/ -- Twitter: https://twitter.com/scikit_learn - Stack Overflow: https://stackoverflow.com/questions/tagged/scikit-learn - GitHub Discussions: https://github.com/scikit-learn/scikit-learn/discussions - Website: https://scikit-learn.org - LinkedIn: https://www.linkedin.com/company/scikit-learn +- Bluesky: https://bsky.app/profile/scikit-learn.org +- Mastodon: https://mastodon.social/@sklearn@fosstodon.org - YouTube: https://www.youtube.com/channel/UCJosFjYm0ZYVUARxuOZqnnw/playlists - Facebook: https://www.facebook.com/scikitlearnofficial/ - Instagram: https://www.instagram.com/scikitlearnofficial/ - TikTok: https://www.tiktok.com/@scikit.learn -- Mastodon: https://mastodon.social/@sklearn@fosstodon.org - Discord: https://discord.gg/h9qyrK8Jc8 diff --git a/SECURITY.md b/SECURITY.md index 18bb99ea3c15c..cfc0bc34c738d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -4,17 +4,20 @@ | Version | Supported | | ------------- | ------------------ | -| 1.4.2 | :white_check_mark: | -| < 1.4.2 | :x: | +| 1.6.1 | :white_check_mark: | +| < 1.6.1 | :x: | ## Reporting a Vulnerability -Please report security vulnerabilities by email to `security@scikit-learn.org`. -This email is an alias to a subset of the scikit-learn maintainers' team. +Please report security vulnerabilities by opening a new [GitHub security +advisory](https://github.com/scikit-learn/scikit-learn/security/advisories/new). + +You can also send an email to `security@scikit-learn.org`, which is an alias to +a subset of the scikit-learn maintainers' team. If the security vulnerability is accepted, a patch will be crafted privately in order to prepare a dedicated bugfix release as timely as possible (depending on the complexity of the fix). -In addition to sending the report by email, you can also report security -vulnerabilities to [tidelift](https://tidelift.com/security). +In addition to the options above, you can also report security vulnerabilities +to [tidelift](https://tidelift.com/security). diff --git a/asv_benchmarks/asv.conf.json b/asv_benchmarks/asv.conf.json index 3392925d7a488..3b16389139c0c 100644 --- a/asv_benchmarks/asv.conf.json +++ b/asv_benchmarks/asv.conf.json @@ -7,31 +7,21 @@ "project": "scikit-learn", // The project's homepage - "project_url": "scikit-learn.org/", + "project_url": "https://scikit-learn.org/", // The URL or local path of the source code repository for the // project being benchmarked "repo": "..", - // The Python project's subdirectory in your repo. If missing or - // the empty string, the project is assumed to be located at the root - // of the repository. - // "repo_subdir": "", - // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. - // - // "install_command": ["python -mpip install {wheel_file}"], - // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], - // "build_command": [ - // "python setup.py build", - // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" - // ], + "install_command": ["python -mpip install {wheel_file}"], + "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + "build_command": ["python -m build --wheel -o {build_cache_dir} {build_dir}"], - // List of branches to benchmark. If not provided, defaults to "master + // List of branches to benchmark. If not provided, defaults to "main" // (for git) or "default" (for mercurial). "branches": ["main"], - // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL @@ -50,19 +40,19 @@ // defaults to 10 min //"install_timeout": 600, + // timeout in seconds all benchmarks, can be overridden per benchmark + // defaults to 1 min + //"default_benchmark_timeout": 60, + // the base URL to show a commit for the project. "show_commit_url": "https://github.com/scikit-learn/scikit-learn/commit/", - // The Pythons you'd like to test against. If not provided, defaults + // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. - // "pythons": ["3.6"], + // "pythons": ["3.12"], - // The list of conda channel names to be searched for benchmark - // dependency packages in the specified order - // "conda_channels": ["conda-forge", "defaults"] - - // The matrix of dependencies to test. Each key is the name of a - // package (in PyPI) and the values are version numbers. An empty + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty // list or empty string indicates to just test against the default // (latest) version. null indicates that the package is to not be // installed. If the package to be tested is only available from @@ -76,12 +66,12 @@ // those due to dependency changes. // "matrix": { - "numpy": ["1.25.2"], - "scipy": ["1.11.2"], + "numpy": ["2.0.0"], + "scipy": ["1.14.0"], "cython": ["3.0.10"], "joblib": ["1.3.2"], "threadpoolctl": ["3.2.0"], - "pandas": ["2.1.0"] + "pandas": ["2.2.2"] }, // Combinations of libraries/python versions can be excluded/included @@ -111,10 +101,10 @@ // ], // // "include": [ - // // additional env for python2.7 - // {"python": "2.7", "numpy": "1.8"}, + // // additional env for python3.12 + // {"python": "3.12", "numpy": "1.26"}, // // additional env if run on windows+conda - // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // {"sys_platform": "win32", "environment_type": "conda", "python": "3.12", "libpython": ""}, // ], // The directory (relative to the current directory) that benchmarks are @@ -136,10 +126,10 @@ // The number of characters to retain in the commit hashes. // "hash_length": 8, - // `asv` will cache results of the recent builds in each + // `asv` will cache wheels of the recent builds in each // environment, making them faster to install next time. This is - // the number of builds to keep, per environment. - // "build_cache_size": 2, + // number of builds to keep, per environment. + // "build_cache_size": 0 // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are @@ -152,16 +142,5 @@ // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether - // }, - - // The thresholds for relative change in results, after which `asv - // publish` starts reporting regressions. Dictionary of the same - // form as in ``regressions_first_commits``, with values - // indicating the thresholds. If multiple entries match, the - // maximum is taken. If no entry matches, the default is 5%. - // - // "regressions_thresholds": { - // "some_benchmark": 0.01, // Threshold of 1% - // "another_benchmark": 0.5, // Threshold of 50% - // }, + // } } diff --git a/asv_benchmarks/benchmarks/config.json b/asv_benchmarks/benchmarks/config.json index f50827cdbd7b7..b5a10b930e60b 100644 --- a/asv_benchmarks/benchmarks/config.json +++ b/asv_benchmarks/benchmarks/config.json @@ -9,7 +9,7 @@ // Can be overridden by environment variable SKLBENCH_PROFILE. "profile": "regular", - // List of values of n_jobs to use for estimators which accept this + // List of values of n_jobs to use for estimators which accept this // parameter (-1 means all cores). An empty list means all values from 1 to // the maximum number of available cores. // Can be overridden by environment variable SKLBENCH_NJOBS. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9b0e8c2259f19..a36daf39b50db 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,7 +11,7 @@ jobs: - job: git_commit displayName: Get Git Commit pool: - vmImage: ubuntu-20.04 + vmImage: ubuntu-24.04 steps: - bash: python build_tools/azure/get_commit_message.py name: commit @@ -27,24 +27,29 @@ jobs: ) displayName: Linting pool: - vmImage: ubuntu-20.04 + vmImage: ubuntu-24.04 steps: - task: UsePythonVersion@0 inputs: - versionSpec: '3.9' + versionSpec: '3.12' - bash: | source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint + pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint displayName: Install linters - bash: | ./build_tools/linting.sh displayName: Run linters + - bash: | + pip install ninja meson scipy + python build_tools/check-meson-openmp-dependencies.py + displayName: Run Meson OpenMP checks + - template: build_tools/azure/posix.yml parameters: name: Linux_Nightly - vmImage: ubuntu-20.04 + vmImage: ubuntu-22.04 dependsOn: [git_commit, linting] condition: | and( @@ -62,101 +67,33 @@ jobs: SKLEARN_WARNINGS_AS_ERRORS: '1' CHECK_PYTEST_SOFT_DEPENDENCY: 'true' -- template: build_tools/azure/posix-docker.yml - # Experimental CPython branch without the Global Interpreter Lock: - # https://github.com/colesbury/nogil/ - # - # The nogil build relies on a dedicated PyPI-style index to install patched - # versions of NumPy, SciPy and Cython maintained by @colesbury and that - # include specific fixes to make them run correctly without relying on the GIL. - # - # The goal of this CI entry is to make sure that we do not introduce any - # dependency on the GIL in scikit-learn itself. An auxiliary goal is to early - # detect any regression in the patched build dependencies to report them - # upstream. The long-term goal is to be able to stop having to maintain - # multiprocessing based workaround / hacks in joblib / loky to make multi-CPU - # computing in scikit-learn efficient by default using regular threads. - # - # If this experimental entry becomes too unstable, feel free to disable it. +- template: build_tools/azure/posix.yml + # CPython 3.13 free-threaded build parameters: - name: Linux_nogil - vmImage: ubuntu-20.04 + name: Linux_free_threaded + vmImage: ubuntu-22.04 dependsOn: [git_commit, linting] condition: | and( succeeded(), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), or(eq(variables['Build.Reason'], 'Schedule'), - contains(dependencies['git_commit']['outputs']['commit.message'], '[nogil]' + contains(dependencies['git_commit']['outputs']['commit.message'], '[free-threaded]' ) ) ) matrix: - pylatest_pip_nogil: - DOCKER_CONTAINER: 'nogil/python' - DISTRIB: 'pip-nogil' - LOCK_FILE: './build_tools/azure/python_nogil_lock.txt' + pylatest_free_threaded: + DISTRIB: 'conda-free-threaded' + LOCK_FILE: './build_tools/azure/pylatest_free_threaded_linux-64_conda.lock' COVERAGE: 'false' - -- template: build_tools/azure/posix-docker.yml - parameters: - name: Linux_Nightly_PyPy - vmImage: ubuntu-20.04 - dependsOn: [linting, git_commit] - condition: | - and( - succeeded(), - not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), - or( - eq(variables['Build.Reason'], 'Schedule'), - contains(dependencies['git_commit']['outputs']['commit.message'], '[pypy]') - ) - ) - matrix: - pypy3: - DOCKER_CONTAINER: 'condaforge/miniforge3:4.10.3-5' - DISTRIB: 'conda-pypy3' - LOCK_FILE: './build_tools/azure/pypy3_linux-64_conda.lock' - - -- job: Linux_Nightly_Pyodide - pool: - vmImage: ubuntu-22.04 - variables: - # Need to match Python version and Emscripten version for the correct - # Pyodide version. For example, for Pyodide version 0.25.1, see - # https://github.com/pyodide/pyodide/blob/0.25.1/Makefile.envs - PYODIDE_VERSION: '0.25.1' - EMSCRIPTEN_VERSION: '3.1.46' - PYTHON_VERSION: '3.11.3' - - dependsOn: [git_commit, linting] - condition: | - and( - succeeded(), - not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), - or(eq(variables['Build.Reason'], 'Schedule'), - contains(dependencies['git_commit']['outputs']['commit.message'], '[pyodide]' - ) - ) - ) - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: $(PYTHON_VERSION) - addToPath: true - - - bash: bash build_tools/azure/install_pyodide.sh - displayName: Build Pyodide wheel - - - bash: bash build_tools/azure/test_script_pyodide.sh - displayName: Test Pyodide wheel + SKLEARN_FAULTHANDLER_TIMEOUT: '1800' # 30 * 60 seconds # Will run all the time regardless of linting outcome. - template: build_tools/azure/posix.yml parameters: name: Linux_Runs - vmImage: ubuntu-20.04 + vmImage: ubuntu-22.04 dependsOn: [git_commit] condition: | and( @@ -173,6 +110,7 @@ jobs: # Here we make sure, that they are still run on a regular basis. ${{ if eq(variables['Build.Reason'], 'Schedule') }}: SKLEARN_SKIP_NETWORK_TESTS: '0' + SCIPY_ARRAY_API: '1' # Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge # By default the CI is sequential, where `Ubuntu_Jammy_Jellyfish` runs first and @@ -202,7 +140,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: Ubuntu_Atlas - vmImage: ubuntu-22.04 + vmImage: ubuntu-24.04 dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] # Runs when dependencies succeeded or skipped condition: | @@ -212,8 +150,8 @@ jobs: ) matrix: # Linux environment to test that scikit-learn can be built against - # versions of numpy, scipy with ATLAS that comes with Ubuntu Jammy Jellyfish 22.04 - # i.e. numpy 1.21.5 and scipy 1.8.0 + # versions of numpy, scipy with ATLAS that comes with Ubuntu 24.04 Noble Numbat + # i.e. numpy 1.26.4 and scipy 1.11.4 ubuntu_atlas: DISTRIB: 'ubuntu' LOCK_FILE: './build_tools/azure/ubuntu_atlas_lock.txt' @@ -223,7 +161,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: Linux - vmImage: ubuntu-20.04 + vmImage: ubuntu-22.04 dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] # Runs when dependencies succeeded or skipped condition: | @@ -232,10 +170,10 @@ jobs: not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: - # Linux + Python 3.9 build with OpenBLAS and without pandas - pymin_conda_defaults_openblas: + # Linux build with minimum supported version of dependencies + pymin_conda_forge_openblas_min_dependencies: DISTRIB: 'conda' - LOCK_FILE: './build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock' + LOCK_FILE: './build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock' # Enable debug Cython directives to capture IndexError exceptions in # combination with the -Werror::pytest.PytestUnraisableExceptionWarning # flag for pytest. @@ -243,7 +181,6 @@ jobs: SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1' SKLEARN_RUN_FLOAT32_TESTS: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '2' # non-default seed - BUILD_WITH_SETUPTOOLS: 'true' # Linux environment to test the latest available dependencies. # It runs tests requiring lightgbm, pandas and PyAMG. pylatest_pip_openblas_pandas: @@ -257,11 +194,12 @@ jobs: # makes sure that they are single threaded in each xdist subprocess. PYTEST_XDIST_VERSION: 'none' PIP_BUILD_ISOLATION: 'true' + SCIPY_ARRAY_API: '1' - template: build_tools/azure/posix-docker.yml parameters: name: Linux_Docker - vmImage: ubuntu-20.04 + vmImage: ubuntu-24.04 dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] # Runs when dependencies succeeded or skipped condition: | @@ -270,11 +208,11 @@ jobs: not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: - debian_atlas_32bit: - DOCKER_CONTAINER: 'i386/debian:11.2' + debian_32bit: + DOCKER_CONTAINER: 'i386/debian:trixie' DISTRIB: 'debian-32' COVERAGE: "true" - LOCK_FILE: './build_tools/azure/debian_atlas_32bit_lock.txt' + LOCK_FILE: './build_tools/azure/debian_32bit_lock.txt' # disable pytest xdist due to unknown bug with 32-bit container PYTEST_XDIST_VERSION: 'none' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '4' # non-default seed @@ -282,7 +220,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: macOS - vmImage: macOS-11 + vmImage: macOS-13 dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] # Runs when dependencies succeeded or skipped condition: | @@ -295,6 +233,7 @@ jobs: DISTRIB: 'conda' LOCK_FILE: './build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '5' # non-default seed + SCIPY_ARRAY_API: '1' pylatest_conda_mkl_no_openmp: DISTRIB: 'conda' LOCK_FILE: './build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock' diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index 44a117f1ad42d..a559bc59b5f8a 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -21,7 +21,7 @@ "extra_trees": ExtraTreesClassifier(max_features="sqrt", min_samples_split=10), "logistic_regression": LogisticRegression(), "naive_bayes": MultinomialNB(), - "adaboost": AdaBoostClassifier(n_estimators=10, algorithm="SAMME"), + "adaboost": AdaBoostClassifier(n_estimators=10), } diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index 5b8cdd588c8ee..243cce03a632f 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -41,9 +41,8 @@ """ -# Author: Peter Prettenhofer -# Arnaud Joly -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import argparse import os diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 97c762e8e9230..4d5ce48cded81 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -46,7 +46,7 @@ def predict(est, data_test, target_test): toc = time() roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1]) acc = accuracy_score(target_test, predicted_test) - print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") + print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc:.4f}") data = fetch_openml(data_id=179, as_frame=True) # adult dataset diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index 20057c50dc810..ceab576bc0a52 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -74,7 +74,7 @@ def predict(est, data_test, target_test): toc = time() roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1]) acc = accuracy_score(target_test, predicted_test) - print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") + print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc:.4f}") df = load_data() diff --git a/benchmarks/bench_isolation_forest_predict.py b/benchmarks/bench_isolation_forest_predict.py new file mode 100644 index 0000000000000..f16e65cf19511 --- /dev/null +++ b/benchmarks/bench_isolation_forest_predict.py @@ -0,0 +1,213 @@ +""" +========================================== +IsolationForest prediction benchmark +========================================== +A test of IsolationForest on classical anomaly detection datasets. + +The benchmark is run as follows: +1. The dataset is randomly split into a training set and a test set, both +assumed to contain outliers. +2. Isolation Forest is trained on the training set fixed at 1000 samples. +3. The test samples are scored using the trained model at: + - 1000, 10000, 50000 samples + - 10, 100, 1000 features + - 0.01, 0.1, 0.5 contamination + - 1, 2, 3, 4 n_jobs + +We compare the prediction time at the very end. + +Here are instructions for running this benchmark to compare runtime against main branch: + +1. Build and run on a branch or main, e.g. for a branch named `pr`: + +```bash +python bench_isolation_forest_predict.py bench ~/bench_results pr +``` + +2. Plotting to compare two branches `pr` and `main`: + +```bash +python bench_isolation_forest_predict.py plot ~/bench_results pr main results_image.png +``` +""" + +import argparse +from collections import defaultdict +from pathlib import Path +from time import time + +import numpy as np +import pandas as pd +from joblib import parallel_config + +from sklearn.ensemble import IsolationForest + +print(__doc__) + + +def get_data( + n_samples_train, n_samples_test, n_features, contamination=0.1, random_state=0 +): + """Function based on code from: https://scikit-learn.org/stable/ + auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto- + examples-ensemble-plot-isolation-forest-py + """ + rng = np.random.RandomState(random_state) + + X = 0.3 * rng.randn(n_samples_train, n_features) + X_train = np.r_[X + 2, X - 2] + + X = 0.3 * rng.randn(n_samples_test, n_features) + X_test = np.r_[X + 2, X - 2] + + n_outliers = int(np.floor(contamination * n_samples_test)) + X_outliers = rng.uniform(low=-4, high=4, size=(n_outliers, n_features)) + + outlier_idx = rng.choice(np.arange(0, n_samples_test), n_outliers, replace=False) + X_test[outlier_idx, :] = X_outliers + + return X_train, X_test + + +def plot(args): + import matplotlib.pyplot as plt + import seaborn as sns + + bench_results = Path(args.bench_results) + pr_name = args.pr_name + main_name = args.main_name + image_path = args.image_path + + results_path = Path(bench_results) + pr_path = results_path / f"{pr_name}.csv" + main_path = results_path / f"{main_name}.csv" + image_path = results_path / image_path + + df_pr = pd.read_csv(pr_path).assign(branch=pr_name) + df_main = pd.read_csv(main_path).assign(branch=main_name) + + # Merge the two datasets on the common columns + merged_data = pd.merge( + df_pr, + df_main, + on=["n_samples_test", "n_jobs"], + suffixes=("_pr", "_main"), + ) + + # Set up the plotting grid + sns.set(style="whitegrid", context="notebook", font_scale=1.5) + + # Create a figure with subplots + fig, axes = plt.subplots(1, 2, figsize=(18, 6), sharex=True, sharey=True) + + # Plot predict time as a function of n_samples_test with different n_jobs + print(merged_data["n_jobs"].unique()) + ax = axes[0] + sns.lineplot( + data=merged_data, + x="n_samples_test", + y="predict_time_pr", + hue="n_jobs", + style="n_jobs", + markers="o", + ax=ax, + legend="full", + ) + ax.set_title(f"Predict Time vs. n_samples_test - {pr_name} branch") + ax.set_ylabel("Predict Time (Seconds)") + ax.set_xlabel("n_samples_test") + + ax = axes[1] + sns.lineplot( + data=merged_data, + x="n_samples_test", + y="predict_time_main", + hue="n_jobs", + style="n_jobs", + markers="X", + dashes=True, + ax=ax, + legend=None, + ) + ax.set_title(f"Predict Time vs. n_samples_test - {main_name} branch") + ax.set_ylabel("Predict Time") + ax.set_xlabel("n_samples_test") + + # Adjust layout and display the plots + plt.tight_layout() + fig.savefig(image_path, bbox_inches="tight") + print(f"Saved image to {image_path}") + + +def bench(args): + results_dir = Path(args.bench_results) + branch = args.branch + random_state = 1 + + results = defaultdict(list) + + # Loop over all datasets for fitting and scoring the estimator: + n_samples_train = 1000 + for n_samples_test in [ + 1000, + 10000, + 50000, + ]: + for n_features in [10, 100, 1000]: + for contamination in [0.01, 0.1, 0.5]: + for n_jobs in [1, 2, 3, 4]: + X_train, X_test = get_data( + n_samples_train, + n_samples_test, + n_features, + contamination, + random_state, + ) + + print("--- Fitting the IsolationForest estimator...") + model = IsolationForest(n_jobs=-1, random_state=random_state) + tstart = time() + model.fit(X_train) + fit_time = time() - tstart + + # clearcache + for _ in range(1000): + 1 + 1 + with parallel_config("threading", n_jobs=n_jobs): + tstart = time() + model.decision_function(X_test) # the lower, the more abnormal + predict_time = time() - tstart + + results["predict_time"].append(predict_time) + results["fit_time"].append(fit_time) + results["n_samples_train"].append(n_samples_train) + results["n_samples_test"].append(n_samples_test) + results["n_features"].append(n_features) + results["contamination"].append(contamination) + results["n_jobs"].append(n_jobs) + + df = pd.DataFrame(results) + df.to_csv(results_dir / f"{branch}.csv", index=False) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + # parse arguments for benchmarking + subparsers = parser.add_subparsers() + bench_parser = subparsers.add_parser("bench") + bench_parser.add_argument("bench_results") + bench_parser.add_argument("branch") + bench_parser.set_defaults(func=bench) + + # parse arguments for plotting + plot_parser = subparsers.add_parser("plot") + plot_parser.add_argument("bench_results") + plot_parser.add_argument("pr_name") + plot_parser.add_argument("main_name") + plot_parser.add_argument("image_path") + plot_parser.set_defaults(func=plot) + + # enable the parser and run the relevant function + args = parser.parse_args() + args.func(args) diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 556c452fa3323..be2ff6548cb92 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -13,7 +13,7 @@ import argparse import gc -from datetime import datetime +from timeit import default_timer import matplotlib.pyplot as plt import numpy as np @@ -52,9 +52,9 @@ def bench_isotonic_regression(Y): """ gc.collect() - tstart = datetime.now() + tstart = default_timer() isotonic_regression(Y) - return (datetime.now() - tstart).total_seconds() + return default_timer() - tstart if __name__ == "__main__": diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index 26789c173688f..a468f7b3e1abf 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -36,8 +36,6 @@ of components (this takes more time). """ -# Authors: Sylvain MARIE, Schneider Electric - import time import matplotlib.pyplot as plt diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index 334e69ed5a30a..5745a6d1e3882 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -26,9 +26,8 @@ dummy 0.00s 0.01s 0.8973 """ -# Author: Issam H. Laradji -# Arnaud Joly -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import argparse import os diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index 1d420d1dabe5d..d5a2d10fbf22d 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -97,8 +97,8 @@ def compute_bench_2(chunks): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # register the 3d projection # noqa: F401 samples_range = np.linspace(50, 150, 5).astype(int) features_range = np.linspace(150, 50000, 5).astype(int) diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index 3b46e447401cb..9acc1b4b35952 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -80,8 +80,8 @@ def compute_bench(samples_range, features_range): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # register the 3d projection # noqa: F401 samples_range = np.linspace(10, 2000, 5).astype(int) features_range = np.linspace(10, 2000, 5).astype(int) diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index f05ede117191b..76d1a6de8286c 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -2,10 +2,8 @@ Benchmarks of Non-Negative Matrix Factorization """ -# Authors: Tom Dupre la Tour (benchmark) -# Chih-Jen Linn (original projected gradient NMF implementation) -# Anthony Di Franco (projected gradient, Python and NumPy port) -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import numbers import sys diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index ca12972f9be6c..5b7cf81f8fce4 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -1,5 +1,6 @@ -# Author: Mathieu Blondel -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + import time import matplotlib.pyplot as plt diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index a80455e21c255..1e23e0a3c79ad 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -39,8 +39,8 @@ """ -# Author: Daniel Lopez-Sanchez -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause # Load data manipulation functions # Will use this for timing results diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index 6bb5618b3633f..e955be64cdee3 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -63,7 +63,8 @@ A. Szlam et al. 2014 """ -# Author: Giorgio Patrini +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import gc import os.path diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index ed99d1c44e2fd..f93920cae5305 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -54,8 +54,8 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # register the 3d projection # noqa: F401 samples_range = np.linspace(2, 1000, 4).astype(int) features_range = np.linspace(2, 1000, 4).astype(int) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index 166c6c2f5f9d1..27e730736a3de 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -1,7 +1,5 @@ -# Authors: Tom Dupre la Tour -# Olivier Grisel -# -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import gc import time diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 4b1b902795feb..bd00615e3d5f9 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,5 +1,5 @@ -# Author: Peter Prettenhofer -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause import gc from time import time diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index 813fffcf29141..8649c7a46b629 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -5,7 +5,7 @@ """ -# License: BSD 3 clause +# SPDX-License-Identifier: BSD-3-Clause import argparse import json diff --git a/build_tools/azure/debian_32bit_lock.txt b/build_tools/azure/debian_32bit_lock.txt new file mode 100644 index 0000000000000..8a6f9762399ca --- /dev/null +++ b/build_tools/azure/debian_32bit_lock.txt @@ -0,0 +1,37 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --output-file=build_tools/azure/debian_32bit_lock.txt build_tools/azure/debian_32bit_requirements.txt +# +coverage[toml]==7.8.0 + # via pytest-cov +cython==3.0.12 + # via -r build_tools/azure/debian_32bit_requirements.txt +iniconfig==2.1.0 + # via pytest +joblib==1.5.0 + # via -r build_tools/azure/debian_32bit_requirements.txt +meson==1.8.0 + # via meson-python +meson-python==0.18.0 + # via -r build_tools/azure/debian_32bit_requirements.txt +ninja==1.11.1.4 + # via -r build_tools/azure/debian_32bit_requirements.txt +packaging==25.0 + # via + # meson-python + # pyproject-metadata + # pytest +pluggy==1.5.0 + # via pytest +pyproject-metadata==0.9.1 + # via meson-python +pytest==8.3.5 + # via + # -r build_tools/azure/debian_32bit_requirements.txt + # pytest-cov +pytest-cov==6.1.1 + # via -r build_tools/azure/debian_32bit_requirements.txt +threadpoolctl==3.6.0 + # via -r build_tools/azure/debian_32bit_requirements.txt diff --git a/build_tools/azure/debian_atlas_32bit_requirements.txt b/build_tools/azure/debian_32bit_requirements.txt similarity index 65% rename from build_tools/azure/debian_atlas_32bit_requirements.txt rename to build_tools/azure/debian_32bit_requirements.txt index 615193a71fc6b..6dcf67d11c58d 100644 --- a/build_tools/azure/debian_atlas_32bit_requirements.txt +++ b/build_tools/azure/debian_32bit_requirements.txt @@ -1,10 +1,10 @@ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py -cython==3.0.10 # min -joblib==1.2.0 # min -threadpoolctl==3.1.0 -pytest==7.1.2 # min -pytest-cov==2.9.0 # min +cython +joblib +threadpoolctl +pytest +pytest-cov ninja meson-python diff --git a/build_tools/azure/debian_atlas_32bit_lock.txt b/build_tools/azure/debian_atlas_32bit_lock.txt deleted file mode 100644 index 61ad07e857cb8..0000000000000 --- a/build_tools/azure/debian_atlas_32bit_lock.txt +++ /dev/null @@ -1,45 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=build_tools/azure/debian_atlas_32bit_lock.txt build_tools/azure/debian_atlas_32bit_requirements.txt -# -attrs==23.2.0 - # via pytest -coverage==7.5.0 - # via pytest-cov -cython==3.0.10 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -iniconfig==2.0.0 - # via pytest -joblib==1.2.0 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -meson==1.4.0 - # via meson-python -meson-python==0.16.0 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -ninja==1.11.1.1 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -packaging==24.0 - # via - # meson-python - # pyproject-metadata - # pytest -pluggy==1.5.0 - # via pytest -py==1.11.0 - # via pytest -pyproject-metadata==0.8.0 - # via meson-python -pytest==7.1.2 - # via - # -r build_tools/azure/debian_atlas_32bit_requirements.txt - # pytest-cov -pytest-cov==2.9.0 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -threadpoolctl==3.1.0 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -tomli==2.0.1 - # via - # meson-python - # pytest diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh index 3016361a6bfdc..c009e2972036e 100755 --- a/build_tools/azure/install.sh +++ b/build_tools/azure/install.sh @@ -24,6 +24,9 @@ setup_ccache() { done export PATH="${CCACHE_LINKS_DIR}:${PATH}" ccache -M 256M + + # Zeroing statistics so that ccache statistics are shown only for this build + ccache -z fi } @@ -36,21 +39,15 @@ pre_python_environment_install() { elif [[ "$DISTRIB" == "debian-32" ]]; then apt-get update apt-get install -y python3-dev python3-numpy python3-scipy \ - python3-matplotlib libatlas3-base libatlas-base-dev \ + python3-matplotlib libopenblas-dev \ python3-virtualenv python3-pandas ccache git - - elif [[ "$DISTRIB" == "conda-pypy3" ]]; then - # need compilers - apt-get -yq update - apt-get -yq install build-essential fi - } check_packages_dev_version() { for package in $@; do package_version=$(python -c "import $package; print($package.__version__)") - if ! [[ $package_version =~ "dev" ]]; then + if [[ $package_version =~ "^[.0-9]+$" ]]; then echo "$package is not a development version: $package_version" exit 1 fi @@ -59,44 +56,39 @@ check_packages_dev_version() { python_environment_install_and_activate() { if [[ "$DISTRIB" == "conda"* ]]; then - # Install/update conda with the libmamba solver because the legacy - # solver can be slow at installing a specific version of conda-lock. - conda install -n base conda conda-libmamba-solver -y - conda config --set solver libmamba - conda install -c conda-forge "$(get_dep conda-lock min)" -y - conda-lock install --name $VIRTUALENV $LOCK_FILE - source activate $VIRTUALENV + create_conda_environment_from_lock_file $VIRTUALENV $LOCK_FILE + activate_environment elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" ]]; then python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV - source $VIRTUALENV/bin/activate + activate_environment pip install -r "${LOCK_FILE}" - elif [[ "$DISTRIB" == "pip-nogil" ]]; then - python -m venv $VIRTUALENV - source $VIRTUALENV/bin/activate - pip install -r "${LOCK_FILE}" fi - if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then + # Install additional packages on top of the lock-file in specific cases + if [[ "$DISTRIB" == "conda-free-threaded" ]]; then + # TODO: we install scipy with pip. When there is a conda-forge package, + # we can update build_tools/update_environments_and_lock_files.py and + # remove the line below + pip install scipy --only-binary :all: + # TODO: we install cython 3.1 alpha from pip. When there is a conda-forge package, + # we can update build_tools/update_environments_and_lock_files.py and + # remove the line below + pip install --pre cython --only-binary :all: + + elif [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then echo "Installing development dependency wheels" dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple - dev_packages="numpy scipy pandas" - pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages + dev_packages="numpy scipy pandas Cython" + pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages --only-binary :all: check_packages_dev_version $dev_packages - echo "Installing Cython from latest sources" - pip install https://github.com/cython/cython/archive/master.zip echo "Installing joblib from latest sources" pip install https://github.com/joblib/joblib/archive/master.zip echo "Installing pillow from latest sources" pip install https://github.com/python-pillow/Pillow/archive/main.zip - - elif [[ "$DISTRIB" == "pip-nogil" ]]; then - apt-get -yq update - apt-get install -yq ccache - fi } @@ -104,10 +96,6 @@ scikit_learn_install() { setup_ccache show_installed_libraries - # Set parallelism to 3 to overlap IO bound tasks with CPU bound tasks on CI - # workers with 2 cores when building the compiled extensions of scikit-learn. - export SKLEARN_BUILD_PARALLEL=3 - if [[ "$UNAMESTR" == "Darwin" && "$SKLEARN_TEST_NO_OPENMP" == "true" ]]; then # Without openmp, we use the system clang. Here we use /usr/bin/ar # instead because llvm-ar errors @@ -118,6 +106,11 @@ scikit_learn_install() { # brings in openmp so that you end up having the omp.h include inside # the conda environment. find $CONDA_PREFIX -name omp.h -delete -print + # meson >= 1.5 detects OpenMP installed with brew and OpenMP may be installed + # with brew in CI runner. OpenMP was installed with brew in macOS-12 CI + # runners which doesn't seem to be the case in macOS-13 runners anymore, + # but we keep the next line just to be safe ... + brew uninstall --ignore-dependencies --force libomp fi if [[ "$UNAMESTR" == "Linux" ]]; then @@ -126,9 +119,7 @@ scikit_learn_install() { export LDFLAGS="$LDFLAGS -Wl,--sysroot=/" fi - if [[ "$BUILD_WITH_SETUPTOOLS" == "true" ]]; then - python setup.py develop - elif [[ "$PIP_BUILD_ISOLATION" == "true" ]]; then + if [[ "$PIP_BUILD_ISOLATION" == "true" ]]; then # Check that pip can automatically build scikit-learn with the build # dependencies specified in pyproject.toml using an isolated build # environment: diff --git a/build_tools/azure/install_pyodide.sh b/build_tools/azure/install_pyodide.sh deleted file mode 100644 index 58d0348a53202..0000000000000 --- a/build_tools/azure/install_pyodide.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -e - -git clone https://github.com/emscripten-core/emsdk.git -cd emsdk -./emsdk install $EMSCRIPTEN_VERSION -./emsdk activate $EMSCRIPTEN_VERSION -source emsdk_env.sh -cd - - -pip install pyodide-build==$PYODIDE_VERSION pyodide-cli - -pyodide build - -ls -ltrh dist - -# The Pyodide js library is needed by build_tools/azure/test_script_pyodide.sh -# to run tests inside Pyodide -npm install pyodide@$PYODIDE_VERSION diff --git a/build_tools/azure/install_setup_conda.sh b/build_tools/azure/install_setup_conda.sh new file mode 100755 index 0000000000000..d09a02cda5a9f --- /dev/null +++ b/build_tools/azure/install_setup_conda.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e +set -x + +if [[ -z "${CONDA}" ]]; then + # In some runners (macOS-13 and macOS-14 in October 2024) conda is not + # installed so we install it ourselves + MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" + wget ${MINIFORGE_URL} -O miniforge.sh + bash miniforge.sh -b -u -p $HOME/miniforge3 + CONDA="$HOME/miniforge3" +else + # In most runners (in October 2024) conda is installed, + # but in a system folder and we want it user writable + sudo chown -R $USER $CONDA +fi + +# Add conda to the PATH so that it can be used in further Azure CI steps. +# Need set +x for ##vso Azure magic otherwise it may add a quote in the PATH. +# For more details, see https://github.com/microsoft/azure-pipelines-tasks/issues/10331 +set +x +echo "##vso[task.prependpath]$CONDA/bin" +set -x diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml index b00ca66c378ca..49b0eb5f0f356 100644 --- a/build_tools/azure/posix-docker.yml +++ b/build_tools/azure/posix-docker.yml @@ -131,3 +131,4 @@ jobs: retryCountOnTaskFailure: 5 env: CODECOV_TOKEN: $(CODECOV_TOKEN) + JUNIT_FILE: $(TEST_DIR)/$(JUNITXML) diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml index 35e5165d22c83..e0f504ba540db 100644 --- a/build_tools/azure/posix.yml +++ b/build_tools/azure/posix.yml @@ -36,11 +36,8 @@ jobs: - bash: $(pyTools.pythonLocation)/bin/python build_tools/azure/get_selected_tests.py displayName: Check selected tests for all random seeds condition: eq(variables['Build.Reason'], 'PullRequest') - - bash: echo "##vso[task.prependpath]$CONDA/bin" - displayName: Add conda to PATH - condition: startsWith(variables['DISTRIB'], 'conda') - - bash: sudo chown -R $USER $CONDA - displayName: Take ownership of conda installation + - bash: build_tools/azure/install_setup_conda.sh + displayName: Install conda if necessary and set it up condition: startsWith(variables['DISTRIB'], 'conda') - task: Cache@2 inputs: @@ -109,3 +106,4 @@ jobs: retryCountOnTaskFailure: 5 env: CODECOV_TOKEN: $(CODECOV_TOKEN) + JUNIT_FILE: $(TEST_DIR)/$(JUNITXML) diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock index 3194bf106d6c2..78f45bec169ac 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock @@ -1,221 +1,248 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 2622dc7361d0af53cfb31534b939a13e48192a3260137ba4ec20083659c2e5fa +# input_hash: f524d159a11a0a80ead3448f16255169f24edde269f6b81e8e28453bc4f7fc53 @EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda#3cfab3e709f77e9f1b3d380eb622494a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-4_cp311.conda#d786502c97404c94d7d58d258a445a65 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-headers-1.20.0-ha770c72_0.conda#96806e6c31dc89253daff2134aeb58f3 +https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2024.2.2-ha957f24_16.conda#42b0d14354b5910a9f41e29289914f6b +https://conda.anaconda.org/conda-forge/linux-64/nlohmann_json-3.12.0-h3f2d84a_0.conda#d76872d096d063e226482c99337209dc +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-7_cp313.conda#e84b44e6300f1703cb25d29120c5b1d8 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda#434ca7e50e40f4918ab701e3facd59a0 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-20.1.4-h024ca30_0.conda#4fc395cda27912a7d904b86b5dbf3a4d +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-3_kmp_llvm.conda#ee5c2118262e30b972bc0b4db8ef0ba5 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda#df88796bd09a0d2ed292e59101478ad8 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.0-hd590300_0.conda#71b89db63b5b504e7afc8ad901172e1e -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f -https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 -https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c -https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda#c151d5eb730e9b7480e6d48c0fc44048 +https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_2.conda#7df50d44d4a14d6c31a2c54f2cd92157 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.14-hb9d3cd8_0.conda#76df83c2a9035c54df5d04ff81bcc02d +https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.12.2-hb9d3cd8_0.conda#bd52f376d1d34d7823a7bf0773be86e8 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda#f7f0d6cc2dc986d42ac2689ec88192be +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda#41b599ed2b02abcfdd84302bff174b23 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.10.0-h4c51ac1_0.conda#aeccfff2806ae38430638ffbb4be9610 +https://conda.anaconda.org/conda-forge/linux-64/libuv-1.50.0-hb9d3cd8_0.conda#771ee65e13bc599b0b62af5359d80169 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.9.0-hada3f3f_0.conda#05a965f6def53dbcb5217945eb0b3689 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.3.1-hc2d532b_4.conda#4cc4dcd582b2f087d62c70b2d6daa59f +https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.2.3-hc2d532b_4.conda#15a1f6fb713b4cd3fee74588b996a846 +https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.2.7-hc2d532b_0.conda#398521f53e58db246658e7cff56d669f +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.1-h5888daf_0.conda#bfd56492d8346d669010eccafe0ba058 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.0-h5888daf_0.conda#d6845ae4dea52a2f90178bf1829a21f8 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda#d411fc29e338efb48c5fd4576d71d881 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libabseil-20230125.3-cxx17_h59595ed_0.conda#d1db1b8be7c3a8983dcbbbfe4f0765de -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_9.conda#61641e239f96eae2b8492dc7e755828c -https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libabseil-20250127.1-cxx17_hbbce691_0.conda#00290e549c5c8a32cc271020acc9ec6b +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda#9566f0bd264fbd463002e759b8a82401 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb9d3cd8_2.conda#06f70867945ea6a84d35836af780f1de +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-h43f5ff8_6.conda#e54a5ddc67e673f9105cf2a2e9c070b0 -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libnuma-2.0.18-hd590300_0.conda#8feeecae73aeef0a2985af46b5a2c1df -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.8.0-h166bdaf_0.tar.bz2#ede4266dc02e875fe1ea77b25dd43747 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 -https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 -https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.0-h00ab1b0_0.conda#b048701d52e7cbb5f59ddd4d3b17bbf5 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/rdma-core-28.9-h59595ed_1.conda#aeffb7c06b5f65e55e6c637408dc4100 -https://conda.anaconda.org/conda-forge/linux-64/re2-2023.03.02-h8c504da_0.conda#206f8fa808748f6e90599c3368a1114e -https://conda.anaconda.org/conda-forge/linux-64/sleef-3.5.1-h9b69904_2.tar.bz2#6e016cf4c525d04a7bd038cee53ad3fd -https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.10-hdb0a2a9_1.conda#78b8b85bdf1f42b8a2b3cb577d8742d1 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.6.1-hc309b26_1.conda#cc09293a2c2b7fd77aff284f370c12c0 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.2.17-h4d4d85c_2.conda#9ca99452635fe03eb5fa937f5ae604b0 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.1.12-h4d4d85c_1.conda#eba092fc6de212a01de0065f38fe8bbb -https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.1.17-h4d4d85c_1.conda#30f9df85ce23cd14faa9a4dfa50cca2b -https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 -https://conda.anaconda.org/conda-forge/linux-64/glog-0.6.0-h6f12383_0.tar.bz2#b31f3565cb84435407594e548a2fb7b2 -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_9.conda#081aa22f4581c08e4372b0b6c2f8478e -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_9.conda#1f0a03af852a9659ed2bf08f2f1704fd -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_6.conda#3666a850342f8f3be88f9a93d948d027 -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae -https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.21.12-hfc55251_2.conda#e3a7d4ba09b8dc939b98fef55f539220 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.6-h232c23b_2.conda#9a3a42df8a95f65334dfc7b80da1195d -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/s2n-1.3.49-h06160fa_0.conda#1d78349eb26366ecc034a4afe70a8534 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 +https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.2.0-h266115a_0.conda#db22a0962c953e81a2a679ecb1fc6027 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.0-h29eaf8c_0.conda#d2f1c87d4416d1e7344cf92b1aaee1c4 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.17-hba75a32_0.conda#dbb899164b5451c34969e67a35ca17a9 +https://conda.anaconda.org/conda-forge/linux-64/sleef-3.8-h1b44611_0.conda#aec4dba5d4c2924730088753f6fa164b +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-h8bd8927_1.conda#3b3e64af585eadfb52bb90b553db5edf https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/ucx-1.14.1-h64cca9d_5.conda#39aa3b356d10d7e5add0c540945a0944 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.13.32-he9a53bd_1.conda#8a24e5820f4a0ffd2ed9c4722cd5d7ca -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_9.conda#d47dee1856d9cb955b8076eeff304a5b -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.0-hf2295e7_6.conda#9342e7c44c38bea649490f72d92c382d -https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.54.3-hb20ce57_0.conda#7af7c59ab24db007dfd82e0a3a343f66 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h2fb2949_1000.conda#7e3726e647a619c6ce5939014dfde86d -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef -https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.3-h2448989_0.conda#927b6d6e80b2c0d4405a58b61ca248a3 -https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.18.1-h8fd135c_2.conda#bbf65f7688512872f063810623b755dc -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.3-h4dfa4b3_0.conda#d39965123dffcad4d750989be65bcb7c -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 -https://conda.anaconda.org/conda-forge/linux-64/orc-1.9.0-h2f23424_1.conda#9571eb3eb0f7fe8b59956a7786babbcd -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.9-hb806964_0_cpython.conda#ac68acfa8b558ed406c75e98d3428d7b -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec -https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.6-pyhd8ed1ab_0.conda#f04c36d7284243a7d982b4ef4982eb23 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.3.1-h2e3709c_4.conda#2cf21b1cbc1c096a28ffa2892257a2c1 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.7.11-h00aa349_4.conda#cb932dff7328ff620ce8059c9968b095 -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_9.conda#4601544b4982ba1861fa9b9c607b2c06 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py311hb755f60_0.conda#f3a8a500a2e743ff92f418f0eaf9bf71 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.0-hde27a5a_6.conda#a9d23c02485c5cf055f9ac90eb9c9c63 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_1.conda#2c65bdf442b0d37aad080c8a4e0d452f -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 -https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.3-default_h5d6823c_0.conda#5fff487759736b275dc3e4a263cac666 +https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_1.conda#a37843723437ba75f42c9270ffe800b1 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.18.1-h1a9f769_2.conda#19221489bff45371c13b983848f79a24 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda#c63b5e52939e795ba8d26e35d767a843 +https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda#ff862eebdfeb2fd048ae9dc92510baca +https://conda.anaconda.org/conda-forge/linux-64/gmp-6.3.0-hac33072_2.conda#c94a5994ef49749880a8139cf9afcbe1 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 +https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.124-hb9d3cd8_0.conda#8bc89311041d7fcb510238cf0848ccae +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.29.3-h501fc15_1.conda#edb86556cf4a0c133e7932a1597ff236 +https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2024.07.02-hba17884_3.conda#545e93a513c10603327c76c15485e946 +https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.21.0-h0e7cc3e_0.conda#dcb95c0a98ba9ff737f7ae482aef7833 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.2.0-he0572af_0.conda#93340b072c393d23c4700a1d40565dca +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hc749103_2.conda#31614c73d7b103ef76faa4d83d261d34 +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.3-hf636f53_101_cp313.conda#10622e12d649154af0bd76bcf33a7c5c +https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda#353823361b1d27eb3960efb076dfcaf6 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda#1c74ff8c35dcadf952a16f752ca5aa49 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.12-h4f16b4b_0.conda#db038ce880f100acc74dba10302b5630 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.5.4-hc5e5e9e_7.conda#eb339cb6cd7c881b3f0e7910e99c261b +https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.10.0-h6884c39_0.conda#76a0f88aeb377e0eee84d48ac65ca747 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda#98514fe74548d768907ce7a13f680e8f +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.3-py313hd8ed1ab_101.conda#904a822cbd380adafb9070debf8579a8 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.12-py313h5dec8f5_0.conda#24a42a0c1cc33743e33572d63d489b54 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/filelock-3.18.0-pyhd8ed1ab_0.conda#4547b39256e296bb758166893e909a7c +https://conda.anaconda.org/conda-forge/noarch/fsspec-2025.3.2-pyhd8ed1ab_0.conda#9c40692c3d24c7aaf335f673ac09d308 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py313h33d0bda_0.conda#9862d13a5e466273d5a4738cffcb8d6c +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 -https://conda.anaconda.org/conda-forge/linux-64/libpq-16.2-h33b98f1_1.conda#9e49ec2a61d02623b379dc332eb6889d +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.13.0-h332b0f4_0.conda#cbdc92ac0d93fe3c796e36ad65c7905c +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.84.1-h2ff4ddf_0.conda#0305434da649d4fb48a425e588b79ea6 +https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda#c8013e438185f33b13814c5c488acd5c +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h4bc477f_1.conda#ad1f1f8238834cd3c88ceeaee8da444a +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py313h8060acc_1.conda#21b62c55924f01b6eef6827167b46acb +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/linux-64/mpfr-4.2.1-h90cbb55_3.conda#2eeb50cab6652538eee8fc0bc3340c81 +https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_1.conda#3585aa87c43ab15b167b574cd73b057b https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d -https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h00ab1b0_0.conda#f1b776cff1b426e7e7461a8502a3b731 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py311h459d7ec_0.conda#cc7727006191b8f3630936b339a76cd0 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.3-h28f7589_1.conda#97503d3e565004697f1651753aa95b9e -https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.9.3-hb447be9_1.conda#c520669eb0be9269a5f0d8ef62531882 -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/coverage-7.5.0-py311h331c9d8_0.conda#5420e3594638adf670fca1a601d7efb9 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py311h459d7ec_0.conda#17e1997cc17c571d5ad27bd0159f616c -https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.0-hf2295e7_6.conda#a1e026a82a562b443845db5614ca568a -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.12.0-hac9eb74_1.conda#0dee716254497604762957076ac76540 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.2.1-h84fe81f_16997.conda#a7ce56d5757f5b57e7daabe703ade5bb -https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py311h18e6fac_0.conda#6c520a9d36c9d7270988c7a6c360d6d4 -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py311hb755f60_0.conda#02336abab4cb5dd794010ef53c54bd09 -https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.3.14-hf3aad02_1.conda#a968ffa7e9fe0c257628033d393e512f -https://conda.anaconda.org/conda-forge/linux-64/blas-1.0-mkl.tar.bz2#349aef876b1d8c9dccae01de20d5b385 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.1-h98fc4e7_1.conda#b04b5cdf3ba01430db27979250bc5a1d -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.4.0-h3d44ed6_0.conda#27f46291a6aaa3c2a4f798ebd35a7ddb -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py311hb755f60_5.conda#e4d262cc3600e70b505a6761d29f6207 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.21.0-hb942446_5.conda#07d92ed5403ad7b5c66ffd7d5b8f7e57 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.1-hfa15dee_1.conda#a6dd2bbc684913e2bef0a54ce56fcbfb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b -https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.10.57-h85b1a90_19.conda#0605d3d60857fc07bd6a11e878fe0f08 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py311h64a7726_0.conda#a502d7aad449a1206efb366d6a12c52d -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 -https://conda.anaconda.org/conda-forge/noarch/array-api-strict-1.1.1-pyhd8ed1ab_0.conda#941bbcd64d1a7b44aeb497f468fc85b4 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py311h9547e67_0.conda#74ad0ae64f1ef565e27eda87fa749e84 -https://conda.anaconda.org/conda-forge/linux-64/libarrow-12.0.1-hb87d912_8_cpu.conda#3f3b11398fe79b578e3c44dd00a44e4a -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py311h320fe9a_0.conda#c79e96ece4110fdaf2657c9f8e16f749 -https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.23-py311h00856b1_0.conda#c000e1629d890ad00bb8c20963028d9f -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py311hf0fb5b6_5.conda#ec7e45bc76d9d0b69a74a2075932b8e8 -https://conda.anaconda.org/conda-forge/linux-64/pytorch-1.13.1-cpu_py311h410fd25_1.conda#ddd2fadddf89e3dc3d541a2537fce010 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py311h64a7726_0.conda#d443c70b4a05f50236c70b9c79beff64 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py311h54ef318_0.conda#150186110f111b458f86c04361351337 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py311h92ebd52_0.conda#2d415a805458e93fcf5551760fd2d287 -https://conda.anaconda.org/conda-forge/linux-64/pyarrow-12.0.1-py311h39c9aba_8_cpu.conda#587370a25bb2c50cce90909ce20d38b8 -https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-1.13.1-cpu_py311hdb170b5_1.conda#a805d5f103e493f207613283d8acbbe1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py311h38be061_0.conda#fd6fc4385d0eb6b00c46c4c0d28f5c48 +https://conda.anaconda.org/conda-forge/noarch/networkx-3.4.2-pyh267e887_2.conda#fd40bf7f7f4bc4b647dc8512053d9873 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/linux-64/orc-2.1.1-h17f744e_1.conda#cfe9bc267c22b6d53438eff187649d43 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh145f28c_0.conda#01384ff1639c6330a0924791413b8714 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/pybind11-global-2.13.6-pyh415d2e4_2.conda#120541563e520d12d8e39abd7de9092c +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/linux-64/re2-2024.07.02-h9925aae_3.conda#6f445fb139c356f903746b2b91bbe786 +https://conda.anaconda.org/conda-forge/noarch/setuptools-75.8.2-pyhff2d567_0.conda#9bddfdbf4e061821a1a443f93223be61 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py313h536fd9c_0.conda#5f5cbdd527d2e74e270d8b6255ba714f +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda#83fc6ae00127671e301c9f44254c31b8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.44-hb9d3cd8_0.conda#7c91bfc90672888259675ad2ad28af9c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.9.0-h9a6e2ae_4.conda#a948110dbbde6491c62815643a96d589 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.12.3-hef6a231_4.conda#fd1d89d79c8287e6bcb2a529292f537a +https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.14.0-h5cfcd09_0.conda#0a8838771cc2e985cd295e01ae83baf1 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.11.3-h80c52d3_0.conda#eb517c6a2b960c3ccb6f1db1005f063a +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.8.0-py313h8060acc_0.conda#375064d30e709bf7c1d4580e70aaea61 +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.57.0-py313h8060acc_0.conda#76b3a3367ac578a7cc43f4b7814e7e87 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda#928b8be80851f5d8ffb016f9c81dae7a +https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.71.0-h8e591d7_1.conda#c3cfd72cbb14113abee7bbd86f44ad69 +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.11.2-default_h0d58e46_1001.conda#804ca9e91bcaea0824a341d55b1684f2 +https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.4-he9d0ab4_0.conda#96c33bbd084ef2b2463503fb7f1482ae +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.9.2-h65c71a3_0.conda#d045b1d878031eb497cab44e6392b1df +https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda#e71f31f8cfb0a91439f2086fc8aa0461 +https://conda.anaconda.org/conda-forge/linux-64/mpc-1.3.1-h24ddda3_1.conda#aa14b9a5196a6d8dd364164b7ce56acf +https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165 +https://conda.anaconda.org/conda-forge/linux-64/prometheus-cpp-1.3.0-ha5d0236_0.conda#a83f6a2fdc079e643237887a37460668 +https://conda.anaconda.org/conda-forge/noarch/pybind11-2.13.6-pyh1ec8472_2.conda#8088a5e7b2888c780738c3130f2a969d +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/noarch/python-gil-3.13.3-h4df99d1_101.conda#82c2641f2f0f513f7d2d1b847a2588e3 +https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.13.2-h0e9735f_0.conda#568ed1300869dca0ba09fb750cda5dbb +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.5-hb9d3cd8_0.conda#eb44b3b6deb1cab08d72cb61686fe64c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcomposite-0.4.6-hb9d3cd8_2.conda#d3c295b50f092ab525ffe3c2aa4b7413 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcursor-1.2.3-hb9d3cd8_0.conda#2ccd714aa2242315acaf0a67faea780b +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda#b5fcc7172d22516e1f965490e65e33a4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.6-hb9d3cd8_0.conda#5efa5fa6243a622445fdfd72aee15efa +https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda#aaa2a381ccc56eac91d63b6c1240312f +https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.7.16-h7dfd680_1.conda#d8870015dbf8a8bb44832f4c330bf044 +https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.10.0-h113e628_0.conda#73f73f60854f325a55f1d31459f2ab73 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.8.0-h736e048_1.conda#13de36be8de3ae3f05ba127631599213 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee +https://conda.anaconda.org/conda-forge/linux-64/gmpy2-2.2.1-py313h11186cd_0.conda#54d020e0eaacf1e99bfb2410b9aa2e5e +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.4-default_h1df26ce_0.conda#96f8d5b2e94c9ba4fef19f1adf068a15 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-20.1.4-default_he06ed0a_0.conda#2d933632c8004be47deb2be61bf013be +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.36.0-hc4361e1_1.conda#ae36e6296a8dd8e8a9a8375965bf6398 +https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-1.20.0-hd1b1c89_0.conda#e1185384cc23e3bbf85486987835df94 +https://conda.anaconda.org/conda-forge/linux-64/libpq-17.4-h27ae623_1.conda#37fba334855ef3b51549308e61ed7a3d +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/optree-0.15.0-py313h33d0bda_0.conda#151f92ff0806c7c700419c8b8cf7cb4b +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py313h8db990d_0.conda#1e86810c6c3fb6d6aebdba26564eb2e8 +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-6.1.1-pyhd8ed1ab_0.conda#1e35d8f975bc0e984a19819aa91c440a +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.13.0-hceb3a55_1.conda#ba7726b8df7b9d34ea80e82b097a4893 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f +https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.32.4-h0cee55f_2.conda#bc519b9909ef60e85ef2d59cd9542a0f +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.13.0-h3cf044e_1.conda#7eb66060455c7a47d9dcdbfa9f46579b +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda#09262e66b19567aff4f592fb53b28760 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.36.0-h0121fbd_1.conda#a0f7588c1f0a26d550e7bae4fb49427a +https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.2.2-ha957f24_16.conda#1459379c79dda834673426504d52b319 +https://conda.anaconda.org/conda-forge/noarch/sympy-1.14.0-pyh2585a3b_105.conda#8c09fac3785696e1c477156192d64b91 +https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.510-h5b777a2_6.conda#2fd0b0d4cc7fc86024b2965feedd628a +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.12.0-ha633028_1.conda#7c1980f89dd41b097549782121a73490 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-11.1.0-h3beb420_0.conda#95e3bb97f9cdc251c0c68640e9c10ed3 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_hfdb39a5_mkl.conda#bdf4a57254e8248222cb631db4393ff1 +https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2024.2.2-ha770c72_16.conda#140891ea14285fc634353b31e9e40a95 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-20.0.0-h27f8bab_0_cpu.conda#6dacb4d072204ce0fd13835759418872 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_h372d94f_mkl.conda#2a06a6c16b45bd3d10002927ca204b67 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_hc41d3b0_mkl.conda#10d012ddd7cc1c7ff9093d4974a34e53 +https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.9.0-h6441bc3_1.conda#4029a8dcb1d97ea241dbe5abfda1fad6 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-20.0.0-hcb10f89_0_cpu.conda#025bf09c4f59e6f5d9a6a4b82dd5894f +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-31_hbc6e62b_mkl.conda#562026e418363dc346ad5a9e18cce73c +https://conda.anaconda.org/conda-forge/linux-64/libparquet-20.0.0-h081d1f1_0_cpu.conda#4ad62607dd9f9902e0bd3d91c5bbce58 +https://conda.anaconda.org/conda-forge/linux-64/libtorch-2.7.0-cpu_mkl_hf6ddc5a_100.conda#6bdda0b10852c6d03b030bab7ec251f0 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.5-py313h17eae1a_0.conda#6ceeff9ed72e54e4a2f9a1c88f47bdde +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-20.0.0-py313he5f92c8_0_cpu.conda#2afdef63d9fbc2cd0e52f8e8f3472404 +https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.9.0-py313h5f61773_0.conda#f51f25ec8fcbf777f8b186bb5deeed40 +https://conda.anaconda.org/conda-forge/noarch/array-api-strict-2.3.1-pyhd8ed1ab_0.conda#11107d0aeb8c590a34fee0894909816b +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-31_hcf00494_mkl.conda#368c93bde87a67d24a74de15bf4c49fd +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.3.2-py313h33d0bda_0.conda#5dc81fffe102f63045225007a33d6199 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-20.0.0-hcb10f89_0_cpu.conda#ebdbd9d4522b4106246866054f7520bf +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py313ha87cce1_3.conda#6248b529e537b1d4cb5ab3ef7f537795 +https://conda.anaconda.org/conda-forge/linux-64/polars-1.27.1-py39h2a4a510_3.conda#fba08963eaa1f954480045d033d1221e +https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.7.0-cpu_mkl_py313_hea9ba1b_100.conda#3c2ce6a304aa827f1e3cc21f7df9190d +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.15.2-py313h86fcf2b_0.conda#ca68acd9febc86448eeed68d0c6c8643 +https://conda.anaconda.org/conda-forge/noarch/scipy-doctest-1.7.1-pyh29332c3_0.conda#d3b3b7b88385648eff6ae39694692f27 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.131-mkl.conda#9bb865b7e01104255ca54e61a58ded15 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-20.0.0-h1bed206_0_cpu.conda#1763dd016d6eee48e2bb29382f8d1562 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.10.1-py313h129903b_0.conda#4e23b3fabf434b418e0d9c6975a6453f +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py313hf0ab243_1.conda#4c769bf3858f424cb2ecf952175ec600 +https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-2.7.0-cpu_mkl_hc60beec_100.conda#20b3051f55ad823a27818dfa46a41c8f +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.10.1-py313h78bf25f_0.conda#d0c80dea550ca97fc0710b2ecef919ba +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-20.0.0-py313h78bf25f_0.conda#6b8d388845ce750fe2ad8436669182f3 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml index 30686a983ab35..e804bf1ce8e31 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml @@ -14,7 +14,7 @@ dependencies: - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - pip @@ -23,9 +23,9 @@ dependencies: - pytest-cov - coverage - ccache - - pytorch=1.13 + - pytorch - pytorch-cpu - polars - pyarrow - - array-api-compat - array-api-strict + - scipy-doctest diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock index 86443fd97ae20..cc98410d95f1a 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock @@ -1,129 +1,132 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 05036df523e23d48cff7b6355ca081c5e5b41d8c5078cb9e1352f79e661d0549 +# input_hash: cee22335ff0a429180f2d8eeb31943f2646e3e653f1197f57ba6e39fc9659b05 @EXPLICIT -https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h10d778d_5.conda#6097a6ca9ada32699b5fc4312dd6ef18 -https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2024.2.2-h8857fd0_0.conda#f2eacee8c33c43692f1ccfd33d0f50b1 -https://conda.anaconda.org/conda-forge/osx-64/icu-73.2-hf5e326d_0.conda#5cc301d759ec03f28328428e28f65591 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h0dc2134_1.conda#9e6c31441c9aa24e41ace40d6151aab6 -https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.6-hd57cbcb_0.conda#7d6972792161077908b62971802f289a -https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.20-h49d49c5_0.conda#d46104f6a896a0bc6a1d37b88b2edf5c -https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.6.2-h73e2aa4_0.conda#3d1d51c8f716d97c864d12f7af329526 -https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2#ccb34fb14960ad8b125962d3d79b31a9 -https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-12.3.0-h0b6f5ec_3.conda#39eeea5454333825d72202fae2d5e0b8 -https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hd75f5a5_2.conda#6c3628d047e151efba7cf08c5e54d1ca -https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.0.0-h0dc2134_1.conda#72507f8e3961bc968af17435060b6dd6 -https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.4.0-h10d778d_0.conda#b2c0047ea73819d992484faacbbe1c24 -https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-h8a1eda9_5.conda#4a3ad23f6e16f99c04e166767193d700 -https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-18.1.3-hb6ac08f_0.conda#506f270f4f00980d27cc1fc127e0ed37 +https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-13.3.0-h297be85_105.conda#c4967f8e797d0ffef3c5650fcdc2cdb5 https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2023.2.0-h6bab518_50500.conda#835abb8ded5e26f23ea6996259c7972e -https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.4.20240210-h73e2aa4_0.conda#50f28c512e9ad78589e3eab34833f762 -https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-hc929b4f_1001.tar.bz2#addd19059de62181cd11ae8f4ef26084 -https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.12-4_cp312.conda#87201ac4314b911b74197e588cca3639 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 -https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.11-h0dc2134_0.conda#9566b4c29274125b0266d0177b5eb97b -https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.3-h35c211d_0.tar.bz2#86ac76d6bf1cbb9621943eb3bd9ae36e -https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2#a72f9d4ea13d55d745ff1ed594747f10 -https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-h73e2aa4_1.conda#92f8d748d95d97f92fc26cfac9bb5b6e +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-7_cp313.conda#e84b44e6300f1703cb25d29120c5b1d8 +https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.10.0-h1c7c39f_2.conda#73434bcf87082942e938352afae9b0fa +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-hfdf4475_7.conda#7ed4301d437b59045be7e051a0308211 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/osx-64/icu-75.1-h120a0e1_0.conda#d68d48a3060eb5abdc1cdc8e2a3a5966 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h00291cd_2.conda#58f2c4bdd56c46cc7451596e4ae68e0b +https://conda.anaconda.org/conda-forge/osx-64/libcxx-20.1.4-hf95d169_0.conda#9a38a63cfe950dd3e1b3adfcba731d3a +https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.23-hcc1b750_0.conda#5d3507f22dda24f7d9a79325ad313e44 +https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.7.0-h240833e_0.conda#026d0a1056ba2a3dbbea6d4b08188676 +https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.6-h281671d_1.conda#4ca9ea59839a9ca8df84170fab4ceb41 +https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.18-h4b5e92a_1.conda#6283140d7b2b55b6b095af939b71b13f +https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.1.0-h6e16a3a_0.conda#87537967e6de2f885a9fcebd42b7cb10 +https://conda.anaconda.org/conda-forge/osx-64/liblzma-5.8.1-hd471939_1.conda#f87e8821e0e38a4140a7ed4f52530053 +https://conda.anaconda.org/conda-forge/osx-64/libmpdec-4.0.0-hfdf4475_0.conda#ed625b2e59dff82859c23dd24774156b +https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.5.0-h6cf52b4_0.conda#5e0cefc99a231ac46ba21e27ae44689f +https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.1-hd23fc13_2.conda#003a54a4e32b02f7355b50a837e699da +https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-20.1.4-ha54dae1_0.conda#985619d7704847d30346abb6feeb8351 +https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.5-h0622a9a_3.conda#ced34dd9929f491ca6dab6a2927aff25 +https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-h00291cd_1002.conda#8bcf980d2c6b17094961198284b8e862 +https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.12-h6e16a3a_0.conda#4cf40e60b444d56512a64f39d12c20bd +https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.5-h00291cd_0.conda#9f438e1b6f4e73fd9e6d78bfe7c36743 +https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-hf036a51_2.conda#427101d13f19c4974552a4e5b072eef1 https://conda.anaconda.org/conda-forge/osx-64/isl-0.26-imath32_h2e86a7b_101.conda#d06222822a9144918333346f145b68c6 -https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_1.conda#9ee0bab91b2ca579e10353738be36063 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_1.conda#8a421fe09c6187f0eb5e2338a8a8be6d -https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda#e4fb4d23ec2870ff3c40d10afe305aec -https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.43-h92b6c6a_0.conda#65dcddb15965c9de2c0365cb14910532 -https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.45.3-h92b6c6a_0.conda#68e462226209f35182ef66eda0f794ff -https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.15-hb7f2c08_0.conda#5513f57e0238c87c12dffedbcc9c1a4a -https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.6-hc0ae0f7_2.conda#50b997370584f2c83ca0c38e9028eab9 -https://conda.anaconda.org/conda-forge/osx-64/ninja-1.12.0-h7728843_0.conda#1ac079f6ecddd2c336f3acb7b371851f -https://conda.anaconda.org/conda-forge/osx-64/openssl-3.2.1-hd75f5a5_1.conda#570a6f04802df580be529f3a72d2bbf7 -https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda#f17f77f2acf4d344734bda76829ce14e -https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108 -https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h1abcd95_1.conda#bf830ba5afc507c6232d4ef0fb1a882d -https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-h8a1eda9_5.conda#75a8a98b1c4671c5d2897975731da42d -https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.5-h829000d_0.conda#80abc41d0c48b82fe0f04e7f42f5cb7e -https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h0dc2134_1.conda#ece565c215adcc47fc1db4e651ee094b -https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h60636b9_2.conda#25152fce119320c980e5470e64834b50 -https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-13_2_0_h97931a8_3.conda#0b6e23a012ee7a9a5f6b244f5a92c1d5 -https://conda.anaconda.org/conda-forge/osx-64/libhwloc-2.10.0-default_h1321489_1000.conda#6f5fe4374d1003e116e2573022178da6 -https://conda.anaconda.org/conda-forge/osx-64/libllvm16-16.0.6-hbedff68_3.conda#8fd56c0adc07a37f93bd44aa61a97c90 -https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.6.0-h129831d_3.conda#568593071d2e6cea7b5fc1f75bfa10ca -https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.1-h4f6b447_1.conda#b90df08f0deb2f58631447c1462c92a7 -https://conda.anaconda.org/conda-forge/osx-64/python-3.12.3-h1411813_0_cpython.conda#df1448ec6cbf8eceb03d29003cf72ae6 -https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865 -https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h0dc2134_1.conda#9272dd3b19c4e8212f8542cefd5c3d67 -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.10-py312hede676d_0.conda#3008aa88f0dc67e7144734b16e331ee4 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.5-py312h49ebfd2_1.conda#21f174a5cfb5964069c374171a979157 -https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.16-ha2f27b4_0.conda#1442db8f03517834843666c422238c9b -https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-711-ha20a434_0.conda#a8b41eb97c8a9d618243a79ba78fdc3c -https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp16-16.0.6-default_h7151d67_6.conda#7eaad118ab797d1427f8745c861d1925 -https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494 -https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-16.0.6-hbedff68_3.conda#e9356b0807462e8f84c1384a8da539a5 -https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.2-h7310d3a_0.conda#05a14cc9d725dd74995927968d6547e3 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d -https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.12.0-h7728843_0.conda#e4fb6f4700d8890c36cbf317c2c6d0cb -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4-py312h41838bb_0.conda#2d2d1fde5800d45cb56218583156d23d -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/osx-64/ccache-4.9.1-h41adc32_0.conda#45aaf96b67840bd98a928de8679098fa -https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-986-ha1c5b94_0.conda#a8951de2506df5649f5a3295fdfd9f2c -https://conda.anaconda.org/conda-forge/osx-64/clang-16-16.0.6-default_h7151d67_6.conda#1c298568c30efe7d9369c7c15b748461 -https://conda.anaconda.org/conda-forge/osx-64/coverage-7.5.0-py312h5fa3f64_0.conda#0ec479f31895645cfaabaa7ea318e6a5 -https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.51.0-py312h41838bb_0.conda#ebe40134b860cf704ddaf81f684f95a5 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-12.3.0-hc328e78_3.conda#b3d751dc7073bbfdfa9d863e39b9685d -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/osx-64/ld64-711-ha02d983_0.conda#3ae4930ec076735cce481e906f5192e0 -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hcca01a6_1.conda#21f765ced1a0ef4070df53cb425e1967 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h00291cd_2.conda#34709a1f5df44e054c4a12ab536c5459 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h00291cd_2.conda#691f0dcb36f1ae67f5c489f20ae987ea +https://conda.anaconda.org/conda-forge/osx-64/libcxx-devel-18.1.8-h7c275be_8.conda#a9513c41f070a9e2d5c370ba5d6c0c00 +https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-14.2.0-h58528f3_105.conda#94560312ff3c78225bed62ab59854c31 +https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.47-h3c4a55f_0.conda#8461ab86d2cdb76d6e971aab225be73f +https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.49.1-hdb6dae5_2.conda#1819e770584a7e83a81541d8253cbabe +https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.17.0-hf1f96e2_0.conda#bbeca862892e2898bdb45792a61c4afc +https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.14.2-h8c082e5_0.conda#4adac80accf99fa253f0620444ad01fb https://conda.anaconda.org/conda-forge/osx-64/mkl-2023.2.0-h54c2260_50500.conda#0a342ccdc79e4fcd359245ac51941e7b -https://conda.anaconda.org/conda-forge/osx-64/pillow-10.3.0-py312h0c923fa_0.conda#6f0591ae972e9b815739da3392fbb3c3 -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/osx-64/cctools-986-h40f6528_0.conda#b7a2ca0062a6ee8bc4e83ec887bef942 -https://conda.anaconda.org/conda-forge/osx-64/clang-16.0.6-hdae98eb_6.conda#884e7b24306e4f21b7ee08dabadb2ecc +https://conda.anaconda.org/conda-forge/osx-64/ninja-1.12.1-hd6aca1a_1.conda#1cf196736676270fa876001901e4e1db +https://conda.anaconda.org/conda-forge/osx-64/openssl-3.5.0-hc426f3f_1.conda#919faa07b9647beb99a0e7404596a465 +https://conda.anaconda.org/conda-forge/osx-64/qhull-2020.2-h3c5361c_5.conda#dd1ea9ff27c93db7c01a7b7656bd4ad4 +https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h7cca4af_2.conda#342570f8e02f2f022147a7f841475784 +https://conda.anaconda.org/conda-forge/osx-64/tapi-1300.6.5-h390ca13_0.conda#c6ee25eb54accb3f1c8fc39203acfaf1 +https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h1abcd95_1.conda#bf830ba5afc507c6232d4ef0fb1a882d +https://conda.anaconda.org/conda-forge/osx-64/zlib-1.3.1-hd23fc13_2.conda#c989e0295dcbdc08106fe5d9e935f0b9 +https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.7-h8210216_2.conda#cd60a4a5a8d6a476b30d8aa4bb49251a +https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h00291cd_2.conda#049933ecbf552479a12c7917f0a4ce59 https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-20_osx64_mkl.conda#160fdc97a51d66d51dc782fb67d35205 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/osx-64/libfreetype6-2.13.3-h40dfd5c_1.conda#c76e6f421a0e95c282142f820835e186 +https://conda.anaconda.org/conda-forge/osx-64/libgfortran-14.2.0-hef36b68_105.conda#6b27baf030f5d6603713c7e72d3f6b9a +https://conda.anaconda.org/conda-forge/osx-64/libllvm18-18.1.8-default_h3571c67_5.conda#01dd8559b569ad39b64fef0a61ded1e9 +https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.7.0-hb77a491_4.conda#b36d793dd65b28e3aeaa3a77abe71678 https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2023.2.0-h694c41f_50500.conda#1b4d0235ef253a1e19459351badf4f9f -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/osx-64/clangxx-16.0.6-default_h7151d67_6.conda#cc8c007a529a7cfaa5d29d8599df3fe6 +https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.1-haed47dc_3.conda#d511e58aaaabfc23136880d9956fa7a6 +https://conda.anaconda.org/conda-forge/osx-64/python-3.13.3-h534c281_101_cp313.conda#ebcc7c42561d8d8b01477020b63218c0 +https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865 +https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h00291cd_2.conda#2db0c38a7f2321c5bdaf32b181e832c7 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.12-py313h9efc8c2_0.conda#ddace7cae5c3073c031ad08ef01881da +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.7-py313h0c4e38b_0.conda#c37fceab459e104e77bb5456e219fc37 +https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.17-h72f5680_0.conda#bf210d0c63f2afb9e414a858b79f0eaa +https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-951.9-h33512f0_6.conda#6cd120f5c9dae65b858e1fad2b7959a0 https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-20_osx64_mkl.conda#51089a4865eb4aec2bc5c7468bd07f9f +https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp18.1-18.1.8-default_h3571c67_9.conda#ef1a444913775b76f3391431967090a9 +https://conda.anaconda.org/conda-forge/osx-64/libfreetype-2.13.3-h694c41f_1.conda#07c8d3fbbe907f32014b121834b36dd5 +https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494 https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-20_osx64_mkl.conda#58f08e12ad487fac4a08f90ff0b87aec -https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-16.0.6-ha38d28d_2.conda#7a46507edc35c6c8818db0adaf8d787f +https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-18-18.1.8-default_h3571c67_5.conda#4391981e855468ced32ca1940b3d7613 +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h9d8efa1_1.conda#0520855aaae268ea413d6bc913f1384c +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.3-h7fd6d84_0.conda#025c711177fc3309228ca1a32374458d +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh145f28c_0.conda#01384ff1639c6330a0924791413b8714 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4.2-py313h63b0ddb_0.conda#74a3a14f82dc65fa19f4fd4e2eb8da93 +https://conda.anaconda.org/conda-forge/osx-64/ccache-4.11.3-h33566b8_0.conda#b65cad834bd6c1f660c101cca09430bf +https://conda.anaconda.org/conda-forge/osx-64/clang-18-18.1.8-default_h3571c67_9.conda#e29d8d2866f15f3b167938cc0e775b2f +https://conda.anaconda.org/conda-forge/osx-64/coverage-7.8.0-py313h717bdf5_0.conda#1215b56c8d9915318d1714cbd004035f +https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.57.0-py313h717bdf5_0.conda#190b8625dd6c38afe4f10e3be50122e4 +https://conda.anaconda.org/conda-forge/osx-64/freetype-2.13.3-h694c41f_1.conda#126dba1baf5030cb6f34533718924577 +https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-13.3.0-hbf5bf67_105.conda#f56a107c8d1253346d01785ecece7977 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/osx-64/ld64-951.9-h4e51db5_6.conda#45bf526d53b1bc95bc0b932a91a41576 https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-20_osx64_mkl.conda#124ae8e384268a8da66f1d64114a1eda -https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py312he3a82b2_0.conda#96c61a21c4276613748dba069554846b +https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-18.1.8-default_h3571c67_5.conda#cc07ff74d2547da1f1452c42b67bafd6 +https://conda.anaconda.org/conda-forge/osx-64/numpy-2.2.5-py313hc518a0f_0.conda#eba644ccc203cfde2fa1f450f528c70d +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-20_osx64_mkl.conda#cc3260179093918b801e373c6e888e02 -https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-16.0.6-ha38d28d_2.conda#3b9e8c5c63b8e86234f499490acd85c2 -https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.2.1-py312h9230928_0.conda#079df34ce7c71259cfdd394645370891 -https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.2-py312h83c8a23_0.conda#b422a5d39ff0cd72923aef807f280145 -https://conda.anaconda.org/conda-forge/osx-64/scipy-1.13.0-py312h8adb940_0.conda#818232a7807c76970172af9c7698ba4a +https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-1010.6-hd19c6af_6.conda#4694e9e497454a8ce5b9fb61e50d9c5d +https://conda.anaconda.org/conda-forge/osx-64/clang-18.1.8-default_h576c50e_9.conda#266e7e8fa2190df09e6f236571c91511 +https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.3.2-py313ha0b1807_0.conda#2c2d1f840df1c512b34e0537ef928169 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.3-py313h2e7108f_3.conda#5c37fc7549913fc4895d7d2e097091ed +https://conda.anaconda.org/conda-forge/osx-64/pillow-11.1.0-py313h0c4f865_0.conda#11b4dd7a814202f2a0b655420f1c1c3a +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-6.1.1-pyhd8ed1ab_0.conda#1e35d8f975bc0e984a19819aa91c440a +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/osx-64/scipy-1.15.2-py313h7e69c36_0.conda#53c23f87aedf2d139d54c88894c8a07f https://conda.anaconda.org/conda-forge/osx-64/blas-2.120-mkl.conda#b041a7677a412f3d925d8208936cb1e2 -https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-16.0.6-h8787910_11.conda#ed9c90270c77481fc4cfccd0891d62a8 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.8.4-py312h1fe5000_0.conda#3e3097734a5042cb6d2675e69bf1fc5a -https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.1.0-py312h3db3e91_0.conda#c6d6248b99fc11b15c9becea581a1462 -https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-16.0.6-hb91bd55_11.conda#24123b15e9c0dad9c0d5fd9da0b4c7a9 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.8.4-py312hb401068_0.conda#187ee42addd449b4899b55c304012436 -https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.7.0-h282daa2_0.conda#4652f33fe8d895f61177e2783b289377 -https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-16.0.6-h6d92fbe_11.conda#a658c595675bde00373347b22a974810 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1 -https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-16.0.6-hb91bd55_11.conda#e49aad30263abdcb785e610981b7c2c7 -https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.3.0-h2c809b3_1.conda#c48adbaa8944234b80ef287c37e329b0 -https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.7.0-h7728843_0.conda#8abaa2694c1fba2b6bd3753d00a60415 -https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.7.0-h6c2ab21_0.conda#2c11db8b46df0a547997116f0fd54b8e -https://conda.anaconda.org/conda-forge/osx-64/compilers-1.7.0-h694c41f_0.conda#3576aa54986a3e2a5370e4232b35c036 +https://conda.anaconda.org/conda-forge/osx-64/cctools-1010.6-ha66f10e_6.conda#a126dcde2752751ac781b67238f7fac4 +https://conda.anaconda.org/conda-forge/osx-64/clangxx-18.1.8-default_heb2e8d1_9.conda#4ba6bd39da787a7306eba77555e86dd3 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.10.1-py313he981572_0.conda#45a80d45944fbc43f081d719b23bf366 +https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.2.1-py313h0322a6a_1.conda#4bda5182eeaef3d2017a2ec625802e1a +https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-18.1.8-hf2b8a54_1.conda#76f906e6bdc58976c5593f650290ae20 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.10.1-py313habf4b1d_0.conda#81ea3344e4fc2066a38199a64738ca6b +https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-18.1.8-h1020d70_1.conda#bc1714a1e73be18e411cff30dc1fe011 +https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-18.1.8-h6a44ed1_24.conda#5224d53acc2604a86d790f664d7fcbc4 +https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-18.1.8-h7e5c614_24.conda#24e1a9c1296772ec45bfcd6a0d855fa5 +https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.9.0-h09a7c41_0.conda#ab45badcb5d035d3bddfdbdd96e00967 +https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-18.1.8-h4b7810f_24.conda#9d27517a71e7268679f1c47e7f34e47b +https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-13.3.0-h3223c34_1.conda#a6eeb1519091ac3239b88ee3914d6cb6 +https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-18.1.8-h7e5c614_24.conda#c1e7c7d5c04d0ea456aa48ddb8a9dc2b +https://conda.anaconda.org/conda-forge/osx-64/gfortran-13.3.0-hcc3c99d_1.conda#e1177b9b139c6cf43250427819f2f07b +https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.9.0-h20888b2_0.conda#cd17d9bf9780b0db4ed31fb9958b167f +https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.9.0-h02557f8_0.conda#2cf645572d7ae534926093b6e9f3bdff +https://conda.anaconda.org/conda-forge/osx-64/compilers-1.9.0-h694c41f_0.conda#b84884262dcd1c2f56a9e1961fdd3326 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml index cfa1b7689a4ad..ad177e4ed391b 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml @@ -14,7 +14,7 @@ dependencies: - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - pip diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml index 01bd378aa121a..0c2eec344c26b 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml @@ -12,12 +12,11 @@ dependencies: - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - pip - ninja - - meson-python - pytest-cov - coverage - ccache @@ -25,3 +24,5 @@ dependencies: - pip: - cython - threadpoolctl + - meson-python + - meson diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock index dc2fea78e7b80..da996af94f867 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock @@ -1,86 +1,82 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: e0d2cf2593df1f2c6969d68cf849136bee785b51f6cfc50ea1bdca2143d4a051 +# input_hash: cc639ea0beeaceb46e2ad729ba559d5d5e746b8f6ff522bc718109af6265069c @EXPLICIT https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a -https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h6c40b1e_5.conda#0f51dde96c82dcf58a788787fed4c5b9 -https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2024.3.11-hecd8cb5_0.conda#a2e29a11940c66baf9942912096fad5f -https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h6c40b1e_1.conda#fc3e61fa41309946c9283fe8737d7f41 -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-hca72f7f_7.conda#6c865b9e76fa2fad0c8ac32aa0f01f75 +https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h6c40b1e_6.conda#96224786021d0765ce05818fa3c59bdb +https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2025.2.25-hecd8cb5_0.conda#12ab77db61795036e15a5b14929ad4a1 +https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h46256e1_3.conda#b1d9769eac428e11f5f922531a1da2e0 https://repo.anaconda.com/pkgs/main/osx-64/libcxx-14.0.6-h9765a3e_0.conda#387757bb354ae9042370452cd0fb5627 -https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.17-hb664fd8_1.conda#b6116b8db33ea6a5b5287dae70d4a913 -https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_0.conda#c20b2687118c471b1d70067ef2b2703f -https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h6c40b1e_0.conda#d8fd9f599dd4e012694e69d119016442 +https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.22-h46256e1_0.conda#7612fb79e5e76fcd16655c7d026f4a66 +https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_1.conda#eb7f09ada4d95f1a26f483f1009d9286 +https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h46256e1_1.conda#399c11b50e6e7a6969aca9a84ea416b7 https://repo.anaconda.com/pkgs/main/osx-64/llvm-openmp-14.0.6-h0dcd299_0.conda#b5804d32b87dc61ca94561ade33d5f2d https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.4-hcec6c5f_0.conda#0214d1ee980e217fabc695f1e40662aa -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 -https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.6-h6c40b1e_0.conda#412bf13f273c0e086da65f86567cfe80 -https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4dc903c_0.conda#d0202dd912bfb45d3422786531717882 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2025b-h04d1e81_0.conda#1d027393db3427ab22a02aa44a56f143 +https://repo.anaconda.com/pkgs/main/osx-64/xz-5.6.4-h46256e1_1.conda#ce989a528575ad332a650bb7c7f7e5d5 +https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4b97444_1.conda#38e35f7c817fac0973034bfce6706ec2 https://repo.anaconda.com/pkgs/main/osx-64/ccache-3.7.9-hf120daa_0.conda#a01515a32e721c51d631283f991bc8ea -https://repo.anaconda.com/pkgs/main/osx-64/expat-2.6.2-hcec6c5f_0.conda#c748234dd7e242784198ab038372cb0c +https://repo.anaconda.com/pkgs/main/osx-64/expat-2.7.1-h6d0c2b6_0.conda#6cdc93776b7551083854e7f106a62720 https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2023.1.0-ha357a0b_43548.conda#ba8a89ffe593eb88e4c01334753c40c3 -https://repo.anaconda.com/pkgs/main/osx-64/lerc-3.0-he9d5cce_0.conda#aec2c3dbef836849c9260f05be04f3db -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-hca72f7f_7.conda#b85983951745cc666d9a1b42894210b2 -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-hca72f7f_7.conda#e306d7a1599202a7c95762443f110832 +https://repo.anaconda.com/pkgs/main/osx-64/lerc-4.0.0-h6d0c2b6_0.conda#824f87854c58df1525557c8639ce7f93 https://repo.anaconda.com/pkgs/main/osx-64/libgfortran5-11.3.0-h9dfd629_28.conda#1fa1a27ee100b1918c3021dbfa3895a3 https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.39-h6c40b1e_0.conda#a3c824835f53ad27aeb86d2b55e47804 -https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_0.conda#44291e9e6920cfff30caf1299f48db38 -https://repo.anaconda.com/pkgs/main/osx-64/ninja-base-1.10.2-haf03e11_5.conda#c857c13129710a61395270656905c4a2 -https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.13-hca72f7f_0.conda#08b109f010b97ce6cef211e235177175 +https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_1.conda#aee0efbb45220e1985533dbff48551f8 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-base-1.12.1-h1962661_0.conda#9c0a94a811e88f182519d9309cf5f634 +https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.16-h184c1cd_0.conda#8e3c130ef85c3260d535153b4d0fd63a https://repo.anaconda.com/pkgs/main/osx-64/readline-8.2-hca72f7f_0.conda#971667436260e523f6f7355fdfa238bf https://repo.anaconda.com/pkgs/main/osx-64/tbb-2021.8.0-ha357a0b_0.conda#fb48530a3eea681c11dafb95b3387c0f -https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.12-h5d9f67b_0.conda#047f0af5486d19163e37fd7f8ae3d29f -https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-hca72f7f_7.conda#110bdca1a20710820e61f7fa3047f737 -https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.12.1-hd8bbffd_0.conda#1f276af321375ee7fe8056843044fa76 +https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.14-h4d00af3_0.conda#a2c03940c2ae54614301ec82e6a98d75 +https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.13.3-h02243ff_0.conda#acf5e48106235eb200eecb79119c7ffc https://repo.anaconda.com/pkgs/main/osx-64/libgfortran-5.0.0-11_3_0_hecd8cb5_28.conda#2eb13b680803f1064e53873ae0aaafb3 https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h8e150cf_43560.conda#85d0f3431dd5c6ae44f8725fdd3d3e59 -https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.41.2-h6c40b1e_0.conda#6947a501943529c7536b7e4ba53802c1 -https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_0.conda#5e0b7ddb1b7dc6b630e1f9a03499c19c -https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-hca72f7f_7.conda#68e54d12ec67591deb2ffd70348fb00f -https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.1-hcec6c5f_0.conda#e127a800ffd9d300ed7d5e1b026944ec -https://repo.anaconda.com/pkgs/main/osx-64/python-3.12.3-hd58486a_0.conda#1a287cfa37c5a92972f5f527b6af7eed -https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py312h6c40b1e_0.conda#b6e4b9fba325047c07f3c9211ae91d1c +https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.45.3-h6c40b1e_0.conda#2edf909b937b3aad48322c9cb2e8f1a0 +https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.6-h138b38a_0.conda#f4d15d7d0054d39e6a24fe8d7d1e37c5 +https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.7.0-h2dfa3ea_0.conda#82a118ce0139e2bf6f7a99c4cfbd4749 +https://repo.anaconda.com/pkgs/main/osx-64/python-3.12.9-hcd54a6c_0.conda#1bf9af06f3e476df1f72e8674a9224df +https://repo.anaconda.com/pkgs/main/osx-64/brotli-python-1.0.9-py312h6d0c2b6_9.conda#425936421fe402074163ac3ffe33a060 +https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.6.9-py312h46256e1_0.conda#f8c1547bbf522a600ee795901240a7b0 https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 +https://repo.anaconda.com/pkgs/main/noarch/execnet-2.1.1-pyhd3eb1b0_0.conda#b3cb797432ee4657d5907b91a5dc65ad https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.4.0-py312hecd8cb5_0.conda#0af12a3a87d9c8051ae6ba2ed2c3882a -https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py312hcec6c5f_0.conda#2ba6561ddd1d05936fe74f5d118ce7dd -https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.12-hf1fd2bf_0.conda#697aba7a3308226df7a93ccfeae16ffa -https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py312h6c40b1e_1.conda#b1ef860be9043b35c5e8d9388b858514 -https://repo.anaconda.com/pkgs/main/osx-64/ninja-1.10.2-hecd8cb5_5.conda#a0043b325fb08db82477ae433668e684 -https://repo.anaconda.com/pkgs/main/osx-64/openjpeg-2.4.0-h66ea3da_0.conda#882833bd7befc5e60e6fba9c518c1b79 -https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.2-py312hecd8cb5_0.conda#2b4e331c8f6df5d95a5dd3af37a34d89 -https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py312hecd8cb5_1.conda#647fada22f1697691fdee90b52c99bcb -https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py312hecd8cb5_0.conda#d85cf2b81c6d9326a57a6418e14db258 -https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2023.3-pyhd3eb1b0_0.conda#479c037de0186d114b9911158427624e +https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.4.2-py312hecd8cb5_0.conda#8ab03dfa447b4e0bfa0bd3d25930f3b6 +https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.8-py312h6d0c2b6_0.conda#060d4498fcc967a640829cb7e55c95f2 +https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.16-h31d93a5_1.conda#42450b66e91caf9ab0672a599e2a7bd0 +https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py312h46256e1_2.conda#04297cb766cabf38613ed6eb4eec85c3 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-1.12.1-hecd8cb5_0.conda#ee3b660616ef0fbcbd0096a67c11c94b +https://repo.anaconda.com/pkgs/main/osx-64/openjpeg-2.5.2-h2d09ccc_1.conda#0f2e221843154b436b5982c695df627b +https://repo.anaconda.com/pkgs/main/osx-64/packaging-24.2-py312hecd8cb5_0.conda#76512e47c9c37443444ef0624769f620 +https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.5.0-py312hecd8cb5_0.conda#ca381e438f1dbd7986ac0fa0da70c9d8 +https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.2.0-py312hecd8cb5_0.conda#e4086daaaed13f68cc8d5b9da7db73cc +https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2025.2-pyhd3eb1b0_0.conda#5ac858f05dbf9d3cdb04d53516901247 https://repo.anaconda.com/pkgs/main/osx-64/pytz-2024.1-py312hecd8cb5_0.conda#2b28ec0e0d07f5c0c701f75200b1e8b6 -https://repo.anaconda.com/pkgs/main/osx-64/setuptools-68.2.2-py312hecd8cb5_0.conda#64235f0c451427d86808c70c1c31cb8b -https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 +https://repo.anaconda.com/pkgs/main/osx-64/setuptools-78.1.1-py312hecd8cb5_0.conda#76b66b96a1564cb76011408c1eb8df3e +https://repo.anaconda.com/pkgs/main/osx-64/six-1.17.0-py312hecd8cb5_0.conda#aadd782bc06426887ae0835eedd98ceb https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.3.3-py312h6c40b1e_0.conda#49173b5a36c9134865221f29d4a73fb6 -https://repo.anaconda.com/pkgs/main/osx-64/unicodedata2-15.1.0-py312h6c40b1e_0.conda#65bd2cb787fc99662d9bb6e6520c5826 -https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.41.2-py312hecd8cb5_0.conda#e7aea266d81142e2bb0bbc2280e64526 -https://repo.anaconda.com/pkgs/main/osx-64/fonttools-4.51.0-py312h6c40b1e_0.conda#8f55fa86b73e8a7f4403503f9b7a9959 -https://repo.anaconda.com/pkgs/main/osx-64/meson-1.3.1-py312hecd8cb5_0.conda#43963a2b38becce4caa95434b8c96837 +https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.4.2-py312h46256e1_0.conda#6b41d7d8a2bf93ae3fc512202b14a9ec +https://repo.anaconda.com/pkgs/main/osx-64/unicodedata2-15.1.0-py312h46256e1_1.conda#4a7fd1dec7277c8ab71aa11aa08df86b +https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.45.1-py312hecd8cb5_0.conda#fafb8687668467d8624d2ddd0909bce9 +https://repo.anaconda.com/pkgs/main/osx-64/fonttools-4.55.3-py312h46256e1_0.conda#f7680dd6b8b1c2f8aab17cf6630c6deb https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.4-py312h6f81483_0.conda#87f73efbf26ab2e2ea7c32481a71bd47 -https://repo.anaconda.com/pkgs/main/osx-64/pillow-10.2.0-py312h6c40b1e_0.conda#5a44bd28cf26fff2d6219e76a86db126 -https://repo.anaconda.com/pkgs/main/osx-64/pip-23.3.1-py312hecd8cb5_0.conda#efc3db40cac09f74bb480d28d3a0b260 -https://repo.anaconda.com/pkgs/main/osx-64/pyproject-metadata-0.7.1-py312hecd8cb5_0.conda#e91ce37477d24dcdf7e0a8b93c5e72fd -https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.0-py312hecd8cb5_0.conda#b816a2439ba9b87524aec74d58e55b0a -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/osx-64/meson-python-0.15.0-py312h6c40b1e_0.conda#688ab56b9d8e5a2e3f018ca3ce34e061 -https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.1.0-py312hecd8cb5_1.conda#a33a24eb20359f464938e75b2f57e23a -https://repo.anaconda.com/pkgs/main/osx-64/pytest-xdist-3.5.0-py312hecd8cb5_0.conda#d1ecfb3691cceecb1f16bcfdf0b67bb5 -https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.7-py312h32608ca_0.conda#f96a01eba5ea542cf9c7cc8d77447627 -https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.2.0-py312ha357a0b_0.conda#57d384ad07152375b40a6293f79e3f0c -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.8.4-py312hecd8cb5_0.conda#6886c230c2ec2f47621b5cca4c7d493a -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.8.4-py312h7f12edd_0.conda#a4eee14a4dcaa89b306ca33d2d479fa4 +https://repo.anaconda.com/pkgs/main/osx-64/pillow-11.1.0-py312h935ef2f_1.conda#c2f7a3f027cc93a3626d50b765b75dc5 +https://repo.anaconda.com/pkgs/main/noarch/pip-25.1-pyhc872135_2.conda#2778327d2a700153fefe0e69438b18e1 +https://repo.anaconda.com/pkgs/main/osx-64/pytest-8.3.4-py312hecd8cb5_0.conda#b15ee02022967632dfa1672669228bee +https://repo.anaconda.com/pkgs/main/osx-64/python-dateutil-2.9.0post0-py312hecd8cb5_2.conda#1047dde28f78127dd9f6121e882926dd +https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-6.0.0-py312hecd8cb5_0.conda#db697e319a4d1145363246a51eef0352 +https://repo.anaconda.com/pkgs/main/osx-64/pytest-xdist-3.6.1-py312hecd8cb5_0.conda#38df9520774ee82bf143218f1271f936 +https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.4.2-py312ha2b695f_0.conda#7efb63b6a5b33829a3b2c7a3efcf53ce +https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.3.1-py312h1962661_0.conda#41499d3a415721b0514f0cccb8288cb1 +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.10.0-py312hecd8cb5_0.conda#2977e81a7775be7963daf49df981b6e0 +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.10.0-py312h919b35b_0.conda#afc11bf311f5921ca4674ebac9592cf8 https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.8-py312h6c40b1e_0.conda#d59d01b940493f2b6a84aac922fd0c76 https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.4-py312ha357a0b_0.conda#c1ea9c8eee79a5af3399f3c31be0e9c6 https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.4-py312hac873b0_0.conda#3150bac1e382156f82a153229e1ebd06 https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.7-py312hac873b0_0.conda#6303ba071636ef57fddf69eb6f440ec1 https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.4-py312h81688c2_0.conda#7d57b4c21a9261f97fa511e0940c5d93 -https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.2.1-py312he282a81_0.conda#021b70a1e40efb75b89eb8ebdb347132 -https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py312h44cbcf4_0.conda#3bdc7be74087b3a5a83c520a74e1e8eb -# pip cython @ https://files.pythonhosted.org/packages/d5/6d/06c08d75adb98cdf72af18801e193d22580cc86ca553610f430f18ea26b3/Cython-3.0.10-cp312-cp312-macosx_10_9_x86_64.whl#sha256=8f2864ab5fcd27a346f0b50f901ebeb8f60b25a60a575ccfd982e7f3e9674914 -# pip threadpoolctl @ https://files.pythonhosted.org/packages/1e/84/ccd9b08653022b7785b6e3ee070ffb2825841e0dc119be22f0840b2b35cb/threadpoolctl-3.4.0-py3-none-any.whl#sha256=8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262 +https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.2.3-py312h6d0c2b6_0.conda#84ce5b8ec4a986d13a5df17811f556a2 +https://repo.anaconda.com/pkgs/main/osx-64/pyamg-5.2.1-py312h1962661_0.conda#58881950d4ce74c9302b56961f97a43c +# pip cython @ https://files.pythonhosted.org/packages/e6/6c/3be501a6520a93449b1e7e6f63e598ec56f3b5d1bc7ad14167c72a22ddf7/Cython-3.0.12-cp312-cp312-macosx_10_9_x86_64.whl#sha256=fe030d4a00afb2844f5f70896b7f2a1a0d7da09bf3aa3d884cbe5f73fff5d310 +# pip meson @ https://files.pythonhosted.org/packages/df/d7/f1c8acf0e597d4d07532f519780ee6e11ba285a9b092f18706b4c9118331/meson-1.8.0-py3-none-any.whl#sha256=472b7b25da286447333d32872b82d1c6f1a34024fb8ee017d7308056c25fec1f +# pip threadpoolctl @ https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl#sha256=43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/7e/b1/8e63033b259e0a4e40dd1ec4a9fee17718016845048b43a36ec67d62e6fe/pyproject_metadata-0.9.1-py3-none-any.whl#sha256=ee5efde548c3ed9b75a354fc319d5afd25e9585fa918a34f62f904cc731973ad +# pip meson-python @ https://files.pythonhosted.org/packages/28/58/66db620a8a7ccb32633de9f403fe49f1b63c68ca94e5c340ec5cceeb9821/meson_python-0.18.0-py3-none-any.whl#sha256=3b0fe051551cc238f5febb873247c0949cd60ded556efa130aa57021804868e2 diff --git a/build_tools/azure/pypy3_environment.yml b/build_tools/azure/pylatest_free_threaded_environment.yml similarity index 75% rename from build_tools/azure/pypy3_environment.yml rename to build_tools/azure/pylatest_free_threaded_environment.yml index 285f1b0d51d17..b947f31beb14a 100644 --- a/build_tools/azure/pypy3_environment.yml +++ b/build_tools/azure/pylatest_free_threaded_environment.yml @@ -4,19 +4,13 @@ channels: - conda-forge dependencies: - - pypy - - python=3.9 + - python-freethreading - numpy - - blas[build=openblas] - - scipy - - cython - joblib - threadpoolctl - - matplotlib - - pyamg - - pytest<8 + - pytest - pytest-xdist - - pip - ninja - meson-python - ccache + - pip diff --git a/build_tools/azure/pylatest_free_threaded_linux-64_conda.lock b/build_tools/azure/pylatest_free_threaded_linux-64_conda.lock new file mode 100644 index 0000000000000..84ca12988c3e1 --- /dev/null +++ b/build_tools/azure/pylatest_free_threaded_linux-64_conda.lock @@ -0,0 +1,58 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: c7db5547fb9ea583bb70736e98b526e9e435c63cb5f6f3c4f38e0f0925e28535 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-7_cp313t.conda#df81edcc11a1176315e8226acab83eec +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda#06d02030237f4d5b3d9a7e7d348fe3c6 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda#0a4d0252248ef9a0f88f2ba8b8a08e12 +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.3-h4724d56_1_cp313t.conda#8193603fe48ace3d8801cfb246f44491 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.3-py313hd8ed1ab_1.conda#6ba9ba47b91b7758cb963d0f0eaf3422 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda#728dbebd0f7a20337218beacffd37916 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh145f28c_0.conda#01384ff1639c6330a0924791413b8714 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.11.3-h80c52d3_0.conda#eb517c6a2b960c3ccb6f1db1005f063a +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda#abb32c727da370c481a1c206f5159ce9 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda#452b98eafe050ecff932f0ec832dd03f +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-freethreading-3.13.3-h92d6c8b_1.conda#4fa25290aec662a01642ba4b3c0ff5c1 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.5-py313h103f029_0.conda#7dcbd568d6f8a4ffba5ace28915f1230 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml index 0f82886f4acb2..6c3da4bb863b4 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml +++ b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml @@ -4,7 +4,7 @@ channels: - defaults dependencies: - - python=3.9 + - python - ccache - pip - pip: @@ -16,7 +16,7 @@ dependencies: - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - ninja @@ -27,3 +27,5 @@ dependencies: - numpydoc - lightgbm - scikit-image + - array-api-strict + - scipy-doctest diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock index 7534de9fbd5f6..b2e928b578212 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock @@ -1,88 +1,91 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: d4063b0b99f7a39e30c5f6e2d9c5dd293d9b206ce326841bf811534ea1be79f0 +# input_hash: 50f16a0198b6eb575a737fee25051b52a644d72f5fca26bd661651a85fcb6a07 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2025.2.25-h06a4308_0.conda#495015d24da8ad929e3ae2d18571016d +https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.40-h12ee557_0.conda#ee672b5f635340734f58d618b7bca024 +https://repo.anaconda.com/pkgs/main/linux-64/python_abi-3.13-0_cp313.conda#d4009c49dd2b54ffded7f1365b5f6505 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2025b-h04d1e81_0.conda#1d027393db3427ab22a02aa44a56f143 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.7.1-h6a678d5_0.conda#269942a9f3f943e2e5d8a2516a861f7c +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 +https://repo.anaconda.com/pkgs/main/linux-64/libmpdec-4.0.0-h5eee18b_0.conda#feb10f42b1a7b523acbf85461be41a3e +https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_0.conda#c73d46a4d666da0ae3dcd3fd8f805122 -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_0.conda#81a9916f581d4da15a3839216a487c66 -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.16-h5eee18b_0.conda#5875526739afa058cfa84da1fa7a2ef4 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.6.4-h5eee18b_1.conda#3581505fa450962d631bd82b8616350e +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_0.conda#33cb019c40e3409df392c99e3c34f352 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.2.2-py39h06a4308_0.conda#5b42cae5548732ae5c167bb1066085de -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.41.2-py39h06a4308_0.conda#ec1b8213c3585defaa6042ed2f95861d -https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda#685007e3dae59d211620f19926577bd6 -# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 -# pip babel @ https://files.pythonhosted.org/packages/0d/35/4196b21041e29a42dc4f05866d0c94fa26c9da88ce12c38c2265e42c82fb/Babel-2.14.0-py3-none-any.whl#sha256=efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287 -# pip certifi @ https://files.pythonhosted.org/packages/ba/06/a07f096c664aeb9f01624f858c3add0a4e913d6c96257acb4fce61e7de14/certifi-2024.2.2-py3-none-any.whl#sha256=dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1 -# pip charset-normalizer @ https://files.pythonhosted.org/packages/98/69/5d8751b4b670d623aa7a47bef061d69c279e9f922f6705147983aa76c3ce/charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.13.2-hf623796_100_cp313.conda#bf836f30ac4c16fd3d71c1aaa25da08c +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-78.1.1-py313h06a4308_0.conda#8f8e1c1e3af9d2d371aaa0ee8316ae7c +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.45.1-py313h06a4308_0.conda#29057e876eedce0e37c2388c138a19f9 +https://repo.anaconda.com/pkgs/main/noarch/pip-25.1-pyhc872135_2.conda#2778327d2a700153fefe0e69438b18e1 +# pip alabaster @ https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl#sha256=fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b +# pip babel @ https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl#sha256=4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2 +# pip certifi @ https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl#sha256=30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c +# pip coverage @ https://files.pythonhosted.org/packages/cb/74/2f8cc196643b15bc096d60e073691dadb3dca48418f08bc78dd6e899383e/coverage-7.8.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008 # pip cycler @ https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl#sha256=85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 -# pip cython @ https://files.pythonhosted.org/packages/a7/f5/3dde4d96076888ceaa981827b098274c2b45ddd4b20d75a8cfaa92b91eec/Cython-3.0.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=651a15a8534ebfb9b58cb0b87c269c70984b6f9c88bfe65e4f635f0e3f07dfcd +# pip cython @ https://files.pythonhosted.org/packages/a8/30/7f48207ea13dab46604db0dd388e807d53513ba6ad1c34462892072f8f8c/Cython-3.0.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=879ae9023958d63c0675015369384642d0afb9c9d1f3473df9186c42f7a9d265 # pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 -# pip exceptiongroup @ https://files.pythonhosted.org/packages/01/90/79fe92dd413a9cab314ef5c591b5aa9b9ba787ae4cadab75055b0ae00b33/exceptiongroup-1.2.1-py3-none-any.whl#sha256=5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad # pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc -# pip fonttools @ https://files.pythonhosted.org/packages/8b/c6/636f008104908a93b80419f756be755bb91df4b8a0c88d5158bb52c82c3a/fonttools-4.51.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=0d145976194a5242fdd22df18a1b451481a88071feadf251221af110ca8f00ce -# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 +# pip fonttools @ https://files.pythonhosted.org/packages/f8/ad/c25116352f456c0d1287545a7aa24e98987b6d99c5b0456c4bd14321f20f/fonttools-4.57.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=4dea5893b58d4637ffa925536462ba626f8a1b9ffbe2f5c272cdf2c6ebadb817 +# pip idna @ https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl#sha256=946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b -# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -# pip joblib @ https://files.pythonhosted.org/packages/ae/e2/4dea6313ef2b38442fccbbaf4017e50a6c3c8a50e8ee9b512783e5c90409/joblib-1.4.0-py3-none-any.whl#sha256=42942470d4062537be4d54c83511186da1fc14ba354961a2114da91efa9a4ed7 -# pip kiwisolver @ https://files.pythonhosted.org/packages/c0/a8/841594f11d0b88d8aeb26991bc4dac38baa909dc58d0c4262a4f7893bcbf/kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff -# pip markupsafe @ https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 -# pip meson @ https://files.pythonhosted.org/packages/33/75/b1a37fa7b2dbca8c0dbb04d5cdd7e2720c8ef6febe41b4a74866350e041c/meson-1.4.0-py3-none-any.whl#sha256=476a458d51fcfa322a6bdc64da5138997c542d08e6b2e49b9fa68c46fd7c4475 -# pip networkx @ https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl#sha256=f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2 -# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b -# pip numpy @ https://files.pythonhosted.org/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 -# pip packaging @ https://files.pythonhosted.org/packages/49/df/1fceb2f8900f8639e278b056416d49134fb8d84c5942ffaa01ad34782422/packaging-24.0-py3-none-any.whl#sha256=2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 -# pip pillow @ https://files.pythonhosted.org/packages/f5/6d/52e82352670e850f468de9e6bccced4202a09f58e7ea5ecdbf08283d85cb/pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl#sha256=1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8 +# pip iniconfig @ https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl#sha256=9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760 +# pip joblib @ https://files.pythonhosted.org/packages/da/d3/13ee227a148af1c693654932b8b0b02ed64af5e1f7406d56b088b57574cd/joblib-1.5.0-py3-none-any.whl#sha256=206144b320246485b712fc8cc51f017de58225fa8b414a1fe1764a7231aca491 +# pip kiwisolver @ https://files.pythonhosted.org/packages/8f/e9/6a7d025d8da8c4931522922cd706105aa32b3291d1add8c5427cdcd66e63/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=a5ce1e481a74b44dd5e92ff03ea0cb371ae7a0268318e202be06c8f04f4f1246 +# pip markupsafe @ https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396 +# pip meson @ https://files.pythonhosted.org/packages/df/d7/f1c8acf0e597d4d07532f519780ee6e11ba285a9b092f18706b4c9118331/meson-1.8.0-py3-none-any.whl#sha256=472b7b25da286447333d32872b82d1c6f1a34024fb8ee017d7308056c25fec1f +# pip networkx @ https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl#sha256=df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f +# pip ninja @ https://files.pythonhosted.org/packages/eb/7a/455d2877fe6cf99886849c7f9755d897df32eaf3a0fba47b56e615f880f7/ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=096487995473320de7f65d622c3f1d16c3ad174797602218ca8c967f51ec38a0 +# pip numpy @ https://files.pythonhosted.org/packages/aa/fc/ebfd32c3e124e6a1043e19c0ab0769818aa69050ce5589b63d05ff185526/numpy-2.2.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=2ba321813a00e508d5421104464510cc962a6f791aa2fca1c97b1e65027da80d +# pip packaging @ https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl#sha256=29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484 +# pip pillow @ https://files.pythonhosted.org/packages/13/eb/2552ecebc0b887f539111c2cd241f538b8ff5891b8903dfe672e997529be/pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl#sha256=ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155 # pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 -# pip pygments @ https://files.pythonhosted.org/packages/97/9c/372fef8377a6e340b1704768d20daaded98bf13282b5327beb2e2fe2c7ef/pygments-2.17.2-py3-none-any.whl#sha256=b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c -# pip pyparsing @ https://files.pythonhosted.org/packages/9d/ea/6d76df31432a0e6fdf81681a895f009a4bb47b3c39036db3e1b528191d52/pyparsing-3.1.2-py3-none-any.whl#sha256=f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 -# pip pytz @ https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl#sha256=328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 -# pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 +# pip pygments @ https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl#sha256=9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c +# pip pyparsing @ https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl#sha256=a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf +# pip pytz @ https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl#sha256=5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 +# pip roman-numerals-py @ https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl#sha256=9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c +# pip six @ https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl#sha256=4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl#sha256=4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl#sha256=aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2 +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl#sha256=166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl#sha256=b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl#sha256=6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331 # pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f -# pip threadpoolctl @ https://files.pythonhosted.org/packages/1e/84/ccd9b08653022b7785b6e3ee070ffb2825841e0dc119be22f0840b2b35cb/threadpoolctl-3.4.0-py3-none-any.whl#sha256=8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262 -# pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc -# pip tzdata @ https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl#sha256=9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252 -# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d -# pip zipp @ https://files.pythonhosted.org/packages/c2/0a/ba9d0ee9536d3ef73a3448e931776e658b36f128d344e175bc32b092a8bf/zipp-3.18.1-py3-none-any.whl#sha256=206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b -# pip contourpy @ https://files.pythonhosted.org/packages/31/a2/2f12e3a6e45935ff694654b710961b03310b0e1ec997ee9f416d3c873f87/contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445 -# pip coverage @ https://files.pythonhosted.org/packages/12/7f/9b787ffc31bc39aa9e98c7005b698e7c6539bd222043e4a9c83b83c782a2/coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e -# pip imageio @ https://files.pythonhosted.org/packages/a3/b6/39c7dad203d9984225f47e0aa39ac3ba3a47c77a02d0ef2a7be691855a06/imageio-2.34.1-py3-none-any.whl#sha256=408c1d4d62f72c9e8347e7d1ca9bc11d8673328af3913868db3b828e28b40a4c -# pip importlib-metadata @ https://files.pythonhosted.org/packages/2d/0a/679461c511447ffaf176567d5c496d1de27cbe34a87df6677d7171b2fbd4/importlib_metadata-7.1.0-py3-none-any.whl#sha256=30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570 -# pip importlib-resources @ https://files.pythonhosted.org/packages/75/06/4df55e1b7b112d183f65db9503bff189e97179b256e1ea450a3c365241e0/importlib_resources-6.4.0-py3-none-any.whl#sha256=50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c -# pip jinja2 @ https://files.pythonhosted.org/packages/30/6d/6de6be2d02603ab56e72997708809e8a5b0fbfee080735109b40a3564843/Jinja2-3.1.3-py3-none-any.whl#sha256=7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa +# pip threadpoolctl @ https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl#sha256=43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb +# pip tzdata @ https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl#sha256=1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 +# pip urllib3 @ https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl#sha256=4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813 +# pip array-api-strict @ https://files.pythonhosted.org/packages/fe/c7/a97e26083985b49a7a54006364348cf1c26e5523850b8522a39b02b19715/array_api_strict-2.3.1-py3-none-any.whl#sha256=0ca6988be1c82d2f05b6cd44bc7e14cb390555d1455deb50f431d6d0cf468ded +# pip contourpy @ https://files.pythonhosted.org/packages/c8/65/5245ce8c548a8422236c13ffcdcdada6a2a812c361e9e0c70548bb40b661/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841 +# pip imageio @ https://files.pythonhosted.org/packages/cb/bd/b394387b598ed84d8d0fa90611a90bee0adc2021820ad5729f7ced74a8e2/imageio-2.37.0-py3-none-any.whl#sha256=11efa15b87bc7871b61590326b2d635439acc321cf7f8ce996f812543ce10eed +# pip jinja2 @ https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl#sha256=85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67 # pip lazy-loader @ https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl#sha256=342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc -# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 -# pip pytest @ https://files.pythonhosted.org/packages/51/ff/f6e8b8f39e08547faece4bd80f89d5a8de68a38b2d179cc1c4490ffa3286/pytest-7.4.4-py3-none-any.whl#sha256=b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8 +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/7e/b1/8e63033b259e0a4e40dd1ec4a9fee17718016845048b43a36ec67d62e6fe/pyproject_metadata-0.9.1-py3-none-any.whl#sha256=ee5efde548c3ed9b75a354fc319d5afd25e9585fa918a34f62f904cc731973ad +# pip pytest @ https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl#sha256=c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 # pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 -# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f -# pip scipy @ https://files.pythonhosted.org/packages/c6/ba/a778e6c0020d728c119b0379805a357135fe8c9bc87fdb7e0750ca11319f/scipy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=28e286bf9ac422d6beb559bc61312c348ca9b0f0dae0d7c5afde7f722d6ea13d -# pip tifffile @ https://files.pythonhosted.org/packages/88/23/6398b7bca8967c853b90ba2f8da5e3ad1e9b2ca5b9f869a8c26ea41543e2/tifffile-2024.4.24-py3-none-any.whl#sha256=8d0b982f4b01ace358835ae6c2beb5a70cb7287f5d3a2e96c318bd5befa97b1f -# pip lightgbm @ https://files.pythonhosted.org/packages/ba/11/cb8b67f3cbdca05b59a032bb57963d4fe8c8d18c3870f30bed005b7f174d/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl#sha256=104496a3404cb2452d3412cbddcfbfadbef9c372ea91e3a9b8794bcc5183bf07 -# pip matplotlib @ https://files.pythonhosted.org/packages/5e/2c/513395a63a9e1124a5648addbf73be23cc603f955af026b04416da98dc96/matplotlib-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=606e3b90897554c989b1e38a258c626d46c873523de432b1462f295db13de6f9 -# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 -# pip pandas @ https://files.pythonhosted.org/packages/bb/30/f6f1f1ac36250f50c421b1b6af08c35e5a8b5a84385ef928625336b93e6f/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921 -# pip pyamg @ https://files.pythonhosted.org/packages/68/a9/aed9f557e7eb779d2cb4fa090663f8540979e0c04dadd16e9a0bdc9632c5/pyamg-5.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5817d4567fb240dab4779bb1630bbb3035b3827731fcdaeb9ecc9c8814319995 -# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 +# pip scipy @ https://files.pythonhosted.org/packages/03/5a/fc34bf1aa14dc7c0e701691fa8685f3faec80e57d816615e3625f28feb43/scipy-1.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 +# pip tifffile @ https://files.pythonhosted.org/packages/6e/be/10d23cfd4078fbec6aba768a357eff9e70c0b6d2a07398425985c524ad2a/tifffile-2025.3.30-py3-none-any.whl#sha256=0ed6eee7b66771db2d1bfc42262a51b01887505d35539daef118f4ff8c0f629c +# pip lightgbm @ https://files.pythonhosted.org/packages/42/86/dabda8fbcb1b00bcfb0003c3776e8ade1aa7b413dff0a2c08f457dace22f/lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl#sha256=cb19b5afea55b5b61cbb2131095f50538bd608a00655f23ad5d25ae3e3bf1c8d +# pip matplotlib @ https://files.pythonhosted.org/packages/51/d0/2bc4368abf766203e548dc7ab57cf7e9c621f1a3c72b516cc7715347b179/matplotlib-3.10.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=7e496c01441be4c7d5f96d4e40f7fca06e20dcb40e44c8daa2e740e1757ad9e6 +# pip meson-python @ https://files.pythonhosted.org/packages/28/58/66db620a8a7ccb32633de9f403fe49f1b63c68ca94e5c340ec5cceeb9821/meson_python-0.18.0-py3-none-any.whl#sha256=3b0fe051551cc238f5febb873247c0949cd60ded556efa130aa57021804868e2 +# pip pandas @ https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 +# pip pyamg @ https://files.pythonhosted.org/packages/cd/a7/0df731cbfb09e73979a1a032fc7bc5be0eba617d798b998a0f887afe8ade/pyamg-5.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6999b351ab969c79faacb81faa74c0fa9682feeff3954979212872a3ee40c298 +# pip pytest-cov @ https://files.pythonhosted.org/packages/28/d0/def53b4a790cfb21483016430ed828f64830dd981ebe1089971cd10cab25/pytest_cov-6.1.1-py3-none-any.whl#sha256=bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde # pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 -# pip scikit-image @ https://files.pythonhosted.org/packages/a3/7e/4cd853a855ac34b4ef3ef6a5c3d1c2e96eaca1154fc6be75db55ffa87393/scikit_image-0.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=3b7a6c89e8d6252332121b58f50e1625c35f7d6a85489c0b6b7ee4f5155d547a -# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 -# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 +# pip scikit-image @ https://files.pythonhosted.org/packages/cd/9b/c3da56a145f52cd61a68b8465d6a29d9503bc45bc993bb45e84371c97d94/scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147 +# pip scipy-doctest @ https://files.pythonhosted.org/packages/76/eb/668949f884d5fe8a0d231dcba42c02e7b84626b35ca9072d6283c3aae773/scipy_doctest-1.7.1-py3-none-any.whl#sha256=dece106ec5ac8c595cc6372480d724e68c684450124dd0ddeb6be487ad62b365 +# pip sphinx @ https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl#sha256=4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/6c/45/56d99ba9366476cd8548527667f01869279cedb9e66b28eb4dfb27701679/numpydoc-1.8.0-py3-none-any.whl#sha256=72024c7fd5e17375dec3608a27c03303e8ad00c81292667955c6fea7a3ccf541 diff --git a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml index 7d8e7a66d987e..01709b79e3720 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml +++ b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml @@ -9,7 +9,7 @@ dependencies: - pip - pip: - threadpoolctl - - pytest<8 + - pytest - pytest-xdist - pip - ninja diff --git a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock index dd70d9af4d30a..9546a87a15657 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock @@ -1,67 +1,70 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 777413179f12c3f7972520657eb2c826ffd6ff4c15e5da73631696b7ef07c3f2 +# input_hash: 7555819e95d879c5a5147e6431581e17ffc5d77e8a43b19c8a911821378d2521 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2025.2.25-h06a4308_0.conda#495015d24da8ad929e3ae2d18571016d +https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.40-h12ee557_0.conda#ee672b5f635340734f58d618b7bca024 +https://repo.anaconda.com/pkgs/main/linux-64/python_abi-3.13-0_cp313.conda#d4009c49dd2b54ffded7f1365b5f6505 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2025b-h04d1e81_0.conda#1d027393db3427ab22a02aa44a56f143 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_5.conda#9c8dec113089c4aca7392c6a3864f505 -https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.7.1-h6a678d5_0.conda#269942a9f3f943e2e5d8a2516a861f7c +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 +https://repo.anaconda.com/pkgs/main/linux-64/libmpdec-4.0.0-h5eee18b_0.conda#feb10f42b1a7b523acbf85461be41a3e https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_0.conda#c73d46a4d666da0ae3dcd3fd8f805122 -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_0.conda#81a9916f581d4da15a3839216a487c66 -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.16-h5eee18b_0.conda#5875526739afa058cfa84da1fa7a2ef4 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.6.4-h5eee18b_1.conda#3581505fa450962d631bd82b8616350e +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.12.3-h996f2a0_0.conda#77af2bd351a8311d1e780bcfa7819bb8 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.2.2-py312h06a4308_0.conda#83ba634cde4f30d9e0b88e4ac9716ca4 -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.41.2-py312h06a4308_0.conda#b2c4f82880d58d679f3982370d80c0e2 -https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py312h06a4308_0.conda#e1d44bca4a257e84af33503233491107 -# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 -# pip babel @ https://files.pythonhosted.org/packages/0d/35/4196b21041e29a42dc4f05866d0c94fa26c9da88ce12c38c2265e42c82fb/Babel-2.14.0-py3-none-any.whl#sha256=efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287 -# pip certifi @ https://files.pythonhosted.org/packages/ba/06/a07f096c664aeb9f01624f858c3add0a4e913d6c96257acb4fce61e7de14/certifi-2024.2.2-py3-none-any.whl#sha256=dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1 -# pip charset-normalizer @ https://files.pythonhosted.org/packages/ee/fb/14d30eb4956408ee3ae09ad34299131fb383c47df355ddb428a7331cfa1e/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b -# pip coverage @ https://files.pythonhosted.org/packages/fa/d9/ec4ba0913195d240d026670d41b91f3e5b9a8a143a385f93a09e97c90f5c/coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.13.2-hf623796_100_cp313.conda#bf836f30ac4c16fd3d71c1aaa25da08c +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-78.1.1-py313h06a4308_0.conda#8f8e1c1e3af9d2d371aaa0ee8316ae7c +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.45.1-py313h06a4308_0.conda#29057e876eedce0e37c2388c138a19f9 +https://repo.anaconda.com/pkgs/main/noarch/pip-25.1-pyhc872135_2.conda#2778327d2a700153fefe0e69438b18e1 +# pip alabaster @ https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl#sha256=fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b +# pip babel @ https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl#sha256=4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2 +# pip certifi @ https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl#sha256=30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c +# pip coverage @ https://files.pythonhosted.org/packages/cb/74/2f8cc196643b15bc096d60e073691dadb3dca48418f08bc78dd6e899383e/coverage-7.8.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008 # pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc -# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 +# pip idna @ https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl#sha256=946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b -# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -# pip markupsafe @ https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 -# pip meson @ https://files.pythonhosted.org/packages/33/75/b1a37fa7b2dbca8c0dbb04d5cdd7e2720c8ef6febe41b4a74866350e041c/meson-1.4.0-py3-none-any.whl#sha256=476a458d51fcfa322a6bdc64da5138997c542d08e6b2e49b9fa68c46fd7c4475 -# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b -# pip packaging @ https://files.pythonhosted.org/packages/49/df/1fceb2f8900f8639e278b056416d49134fb8d84c5942ffaa01ad34782422/packaging-24.0-py3-none-any.whl#sha256=2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 -# pip platformdirs @ https://files.pythonhosted.org/packages/b0/15/1691fa5aaddc0c4ea4901c26f6137c29d5f6673596fe960a0340e8c308e1/platformdirs-4.2.1-py3-none-any.whl#sha256=17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1 +# pip iniconfig @ https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl#sha256=9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760 +# pip markupsafe @ https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396 +# pip meson @ https://files.pythonhosted.org/packages/df/d7/f1c8acf0e597d4d07532f519780ee6e11ba285a9b092f18706b4c9118331/meson-1.8.0-py3-none-any.whl#sha256=472b7b25da286447333d32872b82d1c6f1a34024fb8ee017d7308056c25fec1f +# pip ninja @ https://files.pythonhosted.org/packages/eb/7a/455d2877fe6cf99886849c7f9755d897df32eaf3a0fba47b56e615f880f7/ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=096487995473320de7f65d622c3f1d16c3ad174797602218ca8c967f51ec38a0 +# pip packaging @ https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl#sha256=29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484 +# pip platformdirs @ https://files.pythonhosted.org/packages/6d/45/59578566b3275b8fd9157885918fcd0c4d74162928a5310926887b856a51/platformdirs-4.3.7-py3-none-any.whl#sha256=a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 # pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 -# pip pygments @ https://files.pythonhosted.org/packages/97/9c/372fef8377a6e340b1704768d20daaded98bf13282b5327beb2e2fe2c7ef/pygments-2.17.2-py3-none-any.whl#sha256=b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c -# pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 +# pip pygments @ https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl#sha256=9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c +# pip roman-numerals-py @ https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl#sha256=9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c +# pip six @ https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl#sha256=4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl#sha256=4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl#sha256=aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2 +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl#sha256=166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl#sha256=b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl#sha256=6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331 # pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f -# pip threadpoolctl @ https://files.pythonhosted.org/packages/1e/84/ccd9b08653022b7785b6e3ee070ffb2825841e0dc119be22f0840b2b35cb/threadpoolctl-3.4.0-py3-none-any.whl#sha256=8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262 -# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d -# pip jinja2 @ https://files.pythonhosted.org/packages/30/6d/6de6be2d02603ab56e72997708809e8a5b0fbfee080735109b40a3564843/Jinja2-3.1.3-py3-none-any.whl#sha256=7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa -# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 -# pip pytest @ https://files.pythonhosted.org/packages/51/ff/f6e8b8f39e08547faece4bd80f89d5a8de68a38b2d179cc1c4490ffa3286/pytest-7.4.4-py3-none-any.whl#sha256=b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8 +# pip threadpoolctl @ https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl#sha256=43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb +# pip urllib3 @ https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl#sha256=4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813 +# pip jinja2 @ https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl#sha256=85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67 +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/7e/b1/8e63033b259e0a4e40dd1ec4a9fee17718016845048b43a36ec67d62e6fe/pyproject_metadata-0.9.1-py3-none-any.whl#sha256=ee5efde548c3ed9b75a354fc319d5afd25e9585fa918a34f62f904cc731973ad +# pip pytest @ https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl#sha256=c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 # pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 -# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f -# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 -# pip pooch @ https://files.pythonhosted.org/packages/f4/72/8ae0f1ba4ce6a4f6d4d01a60a9fdf690fde188c45c1872b0b4ddb0607ace/pooch-1.8.1-py3-none-any.whl#sha256=6b56611ac320c239faece1ac51a60b25796792599ce5c0b1bb87bf01df55e0a9 -# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 +# pip meson-python @ https://files.pythonhosted.org/packages/28/58/66db620a8a7ccb32633de9f403fe49f1b63c68ca94e5c340ec5cceeb9821/meson_python-0.18.0-py3-none-any.whl#sha256=3b0fe051551cc238f5febb873247c0949cd60ded556efa130aa57021804868e2 +# pip pooch @ https://files.pythonhosted.org/packages/a8/87/77cc11c7a9ea9fd05503def69e3d18605852cd0d4b0d3b8f15bbeb3ef1d1/pooch-1.8.2-py3-none-any.whl#sha256=3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47 +# pip pytest-cov @ https://files.pythonhosted.org/packages/28/d0/def53b4a790cfb21483016430ed828f64830dd981ebe1089971cd10cab25/pytest_cov-6.1.1-py3-none-any.whl#sha256=bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde # pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 -# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 -# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 +# pip sphinx @ https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl#sha256=4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/6c/45/56d99ba9366476cd8548527667f01869279cedb9e66b28eb4dfb27701679/numpydoc-1.8.0-py3-none-any.whl#sha256=72024c7fd5e17375dec3608a27c03303e8ad00c81292667955c6fea7a3ccf541 diff --git a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock deleted file mode 100644 index a1a9a668e9d2e..0000000000000 --- a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock +++ /dev/null @@ -1,99 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 7d61cf4d650f87956531ca703b2ac2eabd6d427b07664416d5420eb73b39bdf1 -@EXPLICIT -https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran5-11.2.0-h1234567_1.conda#36a01a8c30e0cadf0d3e842c50b73f3b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-11.2.0-h00389a5_1.conda#7429b67ab7b1d7cb99b9d1f3ddaec6e3 -https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd -https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd -https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 -https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_5.conda#9c8dec113089c4aca7392c6a3864f505 -https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 -https://repo.anaconda.com/pkgs/main/linux-64/fftw-3.3.9-h5eee18b_2.conda#db1df41113accc18ec59a99f1631bfcd -https://repo.anaconda.com/pkgs/main/linux-64/icu-73.1-h6a678d5_0.conda#6d09df641fc23f7d277a04dc7ea32dd4 -https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fda872108412d1ccfe3fa572 -https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 -https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_1.conda#82831ef0b6c9595382d74e0c281f6742 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 -https://repo.anaconda.com/pkgs/main/linux-64/libiconv-1.16-h7f8727e_2.conda#80d4bc7d7e58b5f0be41d763f60994f5 -https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.21-h043d6bf_0.conda#7f7324dcc3c4761a14f3e4ac443235a7 -https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.3.2-h5eee18b_0.conda#9179fc7baefa1e027f572edbc519d805 -https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 -https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_0.conda#53915e9402180a7f22ea619c41089520 -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c -https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_0.conda#c73d46a4d666da0ae3dcd3fd8f805122 -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_0.conda#81a9916f581d4da15a3839216a487c66 -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 -https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e -https://repo.anaconda.com/pkgs/main/linux-64/libcups-2.4.2-h2d74bed_1.conda#3f265c2172a9e8c90a74037b6fa13685 -https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20230828-h5eee18b_0.conda#850eb5a9d2d7d3c66cce12e84406ca08 -https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201 -https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22 -https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.4-hfdd30dd_2.conda#ff7a0e3b92afb3c99b82c9f0ba8b5670 -https://repo.anaconda.com/pkgs/main/linux-64/pcre2-10.42-hebb0a14_0.conda#fca6dea6ce1eddd0876a024f62c5097a -https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_0.conda#0f59d57dc21f585f4c282d60dfb46505 -https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 -https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.20.1-h143b758_1.conda#cf1accc86321fa25d6b978cc748039ae -https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011 -https://repo.anaconda.com/pkgs/main/linux-64/libglib-2.78.4-hdc74915_0.conda#2f6d27741e931d5b6ba56e1a1312aaf0 -https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.1-h6a678d5_0.conda#235a671f74f0c4ecad9f9b3b107e3566 -https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 -https://repo.anaconda.com/pkgs/main/linux-64/cyrus-sasl-2.1.28-h52b45da_1.conda#d634af1577e4008f9228ae96ce671c44 -https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1 -https://repo.anaconda.com/pkgs/main/linux-64/glib-tools-2.78.4-h6a678d5_0.conda#3dbe6227cd59818dca9afb75ccb70708 -https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff -https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 -https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.17-hdbd6064_0.conda#6bed363e25859faff66bf546a11c10e8 -https://repo.anaconda.com/pkgs/main/linux-64/openjpeg-2.4.0-h3ad879b_0.conda#86baecb47ecaa7f7ff2657a1f03b90c9 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_0.conda#33cb019c40e3409df392c99e3c34f352 -https://repo.anaconda.com/pkgs/main/linux-64/certifi-2024.2.2-py39h06a4308_0.conda#2bc1db9166ecbb968f61252e6f08c2ce -https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/linux-64/cython-3.0.10-py39h5eee18b_0.conda#1419a658ed2b4d5c3ac1964f33143b64 -https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.2.0-py39h06a4308_0.conda#960e2cb83ac5134df8e593a130aa11af -https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 -https://repo.anaconda.com/pkgs/main/linux-64/glib-2.78.4-h6a678d5_0.conda#045ff487547f7b2b7ff01648681b8ebe -https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py39h06a4308_0.conda#ac1f5687d70aa1128cbecb26bc9e559d -https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py39h6a678d5_0.conda#3d57aedbfbd054ce57fb3c1e4448828c -https://repo.anaconda.com/pkgs/main/linux-64/mysql-5.7.24-h721c034_2.conda#dfc19ca2466d275c4c1f73b62c57f37b -https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.21.6-py39h375b286_0.conda#4ceaa5d6e6307fe06961d555f78b266f -https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.2-py39h06a4308_0.conda#b3f88f45f31bde016e49be3e941e5272 -https://repo.anaconda.com/pkgs/main/linux-64/pillow-10.2.0-py39h5eee18b_0.conda#fca2a1c44d16ec4b8ba71759b4ba9ba4 -https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py39h06a4308_1.conda#fb4fed11ed43cf727dbd51883cc1d9fa -https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py39h06a4308_0.conda#6c89bf6d2fdf6d24126e34cb83fd10f1 -https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py39h06a4308_0.conda#3a0537468e59760404f63b4f04369828 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.13.0-py39h5eee18b_0.conda#256840c3841b52346ea5743be8490ede -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.2.2-py39h06a4308_0.conda#5b42cae5548732ae5c167bb1066085de -https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 -https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py39h06a4308_0.conda#b06dffe7ddca2645ed72f5116f0a087d -https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.3.3-py39h5eee18b_0.conda#9c4bd985bb8adcd12f47e790e95a9333 -https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.41.2-py39h06a4308_0.conda#ec1b8213c3585defaa6042ed2f95861d -https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py39h5eee18b_0.conda#e9da151b7e1f56be2cb569c65949a1d2 -https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 -https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e -https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.21.6-py39hac523dd_0.conda#a03c1fe16cf2558bca3838062c334d7d -https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda#685007e3dae59d211620f19926577bd6 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.0-py39h06a4308_0.conda#99d92a7a39f7e615de84f8cc5606c49a -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/linux-64/sip-6.7.12-py39h6a678d5_0.conda#6988a3e12fcacfedcac523c1e4c3167c -https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.3.4-py39h62a2d02_0.conda#dbab28222c740af8e21a3e5e2882c178 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.1.0-py39h06a4308_1.conda#8f41fce21670b120bf7fa8a7883380d9 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-xdist-3.5.0-py39h06a4308_0.conda#e1d7ffcb1ee2ed9a84800f5c4bbbd7ae -https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.7.3-py39hf838250_2.conda#0667ea5ac14d35e26da19a0f068739da -https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py39h79cecc1_0.conda#afc634da8b81dc504179d53d334e6e55 -https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h53bd1ea_10.conda#bd0c79e82df6323f638bdcb871891b61 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.10-py39h6a678d5_0.conda#52da5ff9b1144b078d2f41bab0b213f2 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py39h06a4308_0.conda#384fc5e01ebfcf30e7161119d3029b5a -# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b diff --git a/build_tools/azure/pymin_conda_forge_mkl_environment.yml b/build_tools/azure/pymin_conda_forge_mkl_environment.yml index fbad1d5bd42a8..fe6ce91950e4a 100644 --- a/build_tools/azure/pymin_conda_forge_mkl_environment.yml +++ b/build_tools/azure/pymin_conda_forge_mkl_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - numpy - blas[build=mkl] - scipy @@ -12,7 +12,7 @@ dependencies: - joblib - threadpoolctl - matplotlib - - pytest<8 + - pytest - pytest-xdist - pillow - pip diff --git a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock index b98735a4336bb..6f8eb6175fa27 100644 --- a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock +++ b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock @@ -1,124 +1,117 @@ # Generated by conda-lock. # platform: win-64 -# input_hash: 4a2ac6360285edd6c1e8182dd51ef698c0041fa9843e4ad9d9bc9dec6a7c8d1d +# input_hash: cc5e2a711eb32773dc46fe159e1c3fe14f4fd07565fc8d3dedf2d748d4f2f694 @EXPLICIT -https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.2.2-h56e8100_0.conda#63da060240ab8087b60d1357051ea7d6 -https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2024.1.0-h57928b3_965.conda#c66eb2fd33b999ccc258aef85689758e -https://conda.anaconda.org/conda-forge/win-64/libasprintf-0.22.5-h5728263_2.conda#75a6982b9ff0a8db0f53303527b07af8 -https://conda.anaconda.org/conda-forge/win-64/mkl-include-2024.1.0-h66d3029_692.conda#60233966dc7c0261c9a443120b43c477 -https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa -https://conda.anaconda.org/conda-forge/win-64/python_abi-3.9-4_cp39.conda#948b0d93d4ab1372d8fd45e1560afd47 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 -https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43 -https://conda.anaconda.org/conda-forge/win-64/libasprintf-devel-0.22.5-h5728263_2.conda#8377da2cc31200d7181d2e48d60e4c7b -https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9 -https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0 -https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.38.33130-h82b7239_18.conda#8be79fdd2725ddf7bbf8a27a4c1f79ba -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd -https://conda.anaconda.org/conda-forge/win-64/vc-14.3-hcf57466_18.conda#20e1e652a4c740fa719002a8449994a2 -https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.38.33130-hcb4865c_18.conda#10d42885e3ed84e575b454db30f1aa93 -https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-hcfcfb64_5.conda#26eb8ca6ea332b675e11704cce84a3be -https://conda.anaconda.org/conda-forge/win-64/icu-73.2-h63175ca_0.conda#0f47d9e3192d9e09ae300da0d28e0f56 -https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074 -https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-hcfcfb64_1.conda#f77f319fb82980166569e1280d5b2864 -https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.20-hcfcfb64_0.conda#b12b5bde5eb201a1df75e49320cc938a -https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135 -https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-hcfcfb64_2.conda#e1eb10b1cca179f2baa3601e4efc8712 -https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.0.0-hcfcfb64_1.conda#3f1b948619c45b1ca714d60c7389092c -https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d -https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.45.3-hcfcfb64_0.conda#73f5dc8e2d55d9a1e14b11f49c3b4a28 -https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.4.0-hcfcfb64_0.conda#abd61d0ab127ec5cd68f62c2969e6f34 -https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_5.conda#5fdb9c6a113b6b6cb5e517fd972d5f41 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc -https://conda.anaconda.org/conda-forge/win-64/ninja-1.12.0-h91493d7_0.conda#e67ab00f4d2c089864c2b8dcccf4dc58 -https://conda.anaconda.org/conda-forge/win-64/openssl-3.2.1-hcfcfb64_1.conda#958e0418e93e50c575bff70fbcaa12d8 -https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2024.2.1-h57928b3_1083.conda#2d89243bfb53652c182a7c73182cce4f +https://conda.anaconda.org/conda-forge/win-64/mkl-include-2024.2.2-h66d3029_15.conda#e2f516189b44b6e042199d13e7015361 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_1.conda#6797b005cd0f439c4c5c9ac565783700 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-h4c7d964_0.conda#23c7fd5062b48d8294fc7f61bf157fba +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/win-64/libwinpthread-12.0.0.r4.gg4f2fc60ca-h57928b3_9.conda#08bfa5da6e242025304b206d152479ef +https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.42.34438-hfd919c2_26.conda#91651a36d31aa20c7ba36299fb7068f4 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/win-64/libgomp-14.2.0-h1383e82_2.conda#dd6b1ab49e28bcb6154cd131acec985b +https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h2b53caa_26.conda#d3f0381e38093bde620a8d85f266ae55 +https://conda.anaconda.org/conda-forge/win-64/_openmp_mutex-4.5-2_gnu.conda#37e16618af5c4851a3f3d66dd0e11141 +https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h2466b09_7.conda#276e7ffe9ffe39688abc665ef0f45596 +https://conda.anaconda.org/conda-forge/win-64/double-conversion-3.3.1-he0c23c2_0.conda#e9a1402439c18a4e3c7a52e4246e9e1c +https://conda.anaconda.org/conda-forge/win-64/graphite2-1.3.13-h63175ca_1003.conda#3194499ee7d1a67404a87d0eefdd92c6 +https://conda.anaconda.org/conda-forge/win-64/icu-75.1-he0c23c2_0.conda#8579b6bb8d18be7c0b27fb08adeeeb40 +https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda#c1b81da6d29a14b542da14a36c9fbf3f +https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-h2466b09_2.conda#f7dc9a8f21d74eab46456df301da2972 +https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.23-h76ddb4d_0.conda#34f03138e46543944d4d7f8538048842 +https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.0-he0c23c2_0.conda#b6f5352fdb525662f4169a0431d2dd7a +https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.6-h537db12_1.conda#85d8fa5e55ed8f93f874b3b23ed54ec6 +https://conda.anaconda.org/conda-forge/win-64/libiconv-1.18-h135ad9c_1.conda#21fc5dba2cbcd8e5e26ff976a312122c +https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.1.0-h2466b09_0.conda#7c51d27540389de84852daa1cdb9c63c +https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_1.conda#14a1042c163181e143a7522dfb8ad6ab +https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.49.1-h67fdade_2.conda#b58b66d4ad1aaf1c2543cbbd6afb1a59 +https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.5.0-h3b0e114_0.conda#33f7313967072c6e6d8f865f5493c7ae +https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_2.conda#41fbfac52c601159df6c01f875de31b9 +https://conda.anaconda.org/conda-forge/win-64/ninja-1.12.1-hc790b64_1.conda#3974c522f3248d4a93e6940c463d2de7 +https://conda.anaconda.org/conda-forge/win-64/openssl-3.5.0-ha4e3fda_1.conda#72c07e46b6766bb057018a9a74861b89 +https://conda.anaconda.org/conda-forge/win-64/pixman-0.46.0-had0cd8c_0.conda#01617534ef71b5385ebba940a6d6150d +https://conda.anaconda.org/conda-forge/win-64/qhull-2020.2-hc790b64_5.conda#854fbdff64b572b5c0b470f334d34c11 https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda#fc048363eb8f03cd1737600a5d08aafe -https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219 -https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.2-heb0366b_0.conda#6e8b0f22b4eef3b3cb3849bb4c3d47f9 -https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_1.conda#19ce3e1dacc7912b3d6ff40690ba9ae0 -https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_1.conda#71e890a0b361fd58743a13f77e1506b7 -https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_2.conda#aa622c938af057adc119f8b8eecada01 -https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.43-h19919ed_0.conda#77e398acc32617a0384553aea29e866b -https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 -https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.6-hc3477c8_2.conda#ac7af7a949db01dae61ddc48f4a93d79 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de -https://conda.anaconda.org/conda-forge/win-64/pcre2-10.43-h17e33f8_0.conda#d0485b8aa2cedb141a7bd27b4efa4c9c -https://conda.anaconda.org/conda-forge/win-64/python-3.9.19-h4de0772_0_cpython.conda#b6999bc275e0e6beae7b1c8ea0be1e85 -https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.5-h12be248_0.conda#792bb5da68bf0a6cac6a6072ecb8dbeb -https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hcfcfb64_1.conda#0105229d7c5fabaa840043a86c10ec64 -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/win-64/cython-3.0.10-py39h99910a6_0.conda#8ebc2fca8a6840d0694f37e698f4e59c -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda#3761b23693f768dc75a8fd0a73ca053f -https://conda.anaconda.org/conda-forge/win-64/gettext-tools-0.22.5-h7d00a51_2.conda#ef1c3bb48c013099c4872640a5f2096c -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py39h1f6ef14_1.conda#4fc5bd0a7b535252028c647cc27d6c87 -https://conda.anaconda.org/conda-forge/win-64/libclang13-18.1.3-default_hf64faad_0.conda#9217c37b478ec601af909aafc954a6fc -https://conda.anaconda.org/conda-forge/win-64/libgettextpo-0.22.5-h5728263_2.conda#f4c826b19bf1ccee2a63a2c685039728 -https://conda.anaconda.org/conda-forge/win-64/libglib-2.80.0-h39d0aa6_6.conda#cd5c6efbe213c089f78575c98ab9a0ed -https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.10.0-default_h2fffb23_1000.conda#ee944f0d41d9e2048f9d7492c1623ca3 -https://conda.anaconda.org/conda-forge/win-64/libintl-devel-0.22.5-h5728263_2.conda#a2ad82fae23975e4ccbfab2847d31d48 -https://conda.anaconda.org/conda-forge/win-64/libtiff-4.6.0-hddb2be6_3.conda#6d1828c9039929e2f185c5fa9d133018 +https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda#31aec030344e962fbd7dbbbbd68e60a9 +https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-h2466b09_2.conda#9bae75ce723fa34e98e239d21d752a7e +https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-h2466b09_2.conda#85741a24d97954a991e55e34bc55990b +https://conda.anaconda.org/conda-forge/win-64/libgcc-14.2.0-h1383e82_2.conda#4a74c1461a0ba47a3346c04bdccbe2ad +https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_3.conda#2cf0cf76cc15d360dfa2f17fd6cf9772 +https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.47-h7a4582a_0.conda#ad620e92b82d2948bc019e029c574ebb +https://conda.anaconda.org/conda-forge/win-64/libxml2-2.13.7-h442d1da_1.conda#c14ff7f05e57489df9244917d2b55763 +https://conda.anaconda.org/conda-forge/win-64/pcre2-10.44-h99c9b8b_2.conda#a912b2c4ff0f03101c751aa79a331831 +https://conda.anaconda.org/conda-forge/win-64/python-3.10.17-h8c5b53a_0_cpython.conda#0c59918f056ab2e9c7bb45970d32b2ea +https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-hbeecb71_2.conda#21f56217d6125fb30c3c3f10c786d751 +https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-h2466b09_2.conda#d22534a9be5771fc58eb7564947f669d +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/win-64/cython-3.0.12-py310h6bd2d47_0.conda#8b4e32766e91dfad20bdfd9747e66d54 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.7-py310hc19bc0b_0.conda#50d96539497fc7493cbe469fbb6b8b6e +https://conda.anaconda.org/conda-forge/win-64/libclang13-20.1.4-default_h6e92b77_0.conda#80c3ee2ffb5f35f2b6c4b10d636b04fb +https://conda.anaconda.org/conda-forge/win-64/libfreetype6-2.13.3-h0b5ce68_1.conda#a84b7d1a13060a9372bea961a8131dbc +https://conda.anaconda.org/conda-forge/win-64/libglib-2.84.1-h7025463_0.conda#6cbaea9075a4f007eb7d0a90bb9a2a09 +https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.11.2-default_ha69328c_1001.conda#b87a0ac5ab6495d8225db5dc72dd21cd +https://conda.anaconda.org/conda-forge/win-64/libtiff-4.7.0-h797046b_4.conda#7d938ca70c64c5516767b4eae0a56172 +https://conda.anaconda.org/conda-forge/win-64/libxslt-1.1.39-h3df6e99_0.conda#279ee338c9b34871d578cb3c7aa68f70 +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 -https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/win-64/tornado-6.4-py39ha55989b_0.conda#d8f52e8e1d02f9a5901f9224e2ddf98f -https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.1.0-py39ha55989b_0.conda#20ec896e8d97f2ff8be1124e624dc8f2 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a -https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_1.conda#f47f6db2528e38321fb00ae31674c133 -https://conda.anaconda.org/conda-forge/win-64/coverage-7.5.0-py39ha55e580_0.conda#53799e32a839e6a86e5b104a768dcd9d -https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.80.0-h0a98069_6.conda#40d452e4012c00f644b1dd6319fcdbcf -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda#d3592435917b62a8becff3a60db674f6 -https://conda.anaconda.org/conda-forge/win-64/libgettextpo-devel-0.22.5-h5728263_2.conda#6f42ec61abc6d52a4079800a640319c5 -https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.2-h3d672ee_0.conda#7e7099ad94ac3b599808950cec30ad4e -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/win-64/sip-6.7.12-py39h99910a6_0.conda#0cc5774390ada632ed7975203057c91c -https://conda.anaconda.org/conda-forge/win-64/tbb-2021.12.0-h91493d7_0.conda#21745fdd12f01b41178596143cbecffd -https://conda.anaconda.org/conda-forge/win-64/fonttools-4.51.0-py39ha55989b_0.conda#5d19302bab29e347116b743e793aa7d6 -https://conda.anaconda.org/conda-forge/win-64/gettext-0.22.5-h5728263_2.conda#da84216f88a8c89eb943c683ceb34d7d -https://conda.anaconda.org/conda-forge/win-64/glib-2.80.0-h39d0aa6_6.conda#a4036d0bc6f499ebe9fef7b887f3ca0f -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/win-64/mkl-2024.1.0-h66d3029_692.conda#b43ec7ed045323edeff31e348eea8652 -https://conda.anaconda.org/conda-forge/win-64/pillow-10.3.0-py39h9ee4981_0.conda#6d69d57c41867acc162ef0205a8efaef -https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.12.2-py39h99910a6_5.conda#dffbcea794c524c471772a5f697c2aea -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.24.1-hb4038d2_1.conda#8a6dfe53ad02a3b151e6383a950043ee -https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-22_win64_mkl.conda#65c56ecdeceffd6c32d3d54db7e02c6e -https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2024.1.0-h57928b3_692.conda#9b3d1d4916a56fd32460f6fe784dcb51 -https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.24.1-h001b923_1.conda#7900eb39e6203249accb52fb705a2fb0 -https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-22_win64_mkl.conda#336c93ab102846c6131cf68e722a68f1 -https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-22_win64_mkl.conda#c752cc2af9f3d8d7b2fdebb915a33ef7 -https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-22_win64_mkl.conda#db33ffa4bae1d2f6d5602afaa048bf6b -https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py39hddb5d58_0.conda#6e30ff8f2d3f59f45347dfba8bc22a04 -https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-hcef0176_21.conda#76544d3dfeff8fd52250df168cb0005b -https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-22_win64_mkl.conda#adeb834f3b7b06f3d77cd90b7c9d08f0 -https://conda.anaconda.org/conda-forge/win-64/contourpy-1.2.1-py39h1f6ef14_0.conda#03e25c6bae87f4f9595337255b44b0fb -https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.9-py39hb77abff_5.conda#5ed899124a51958336371ff01482b8fd -https://conda.anaconda.org/conda-forge/win-64/scipy-1.13.0-py39hddb5d58_0.conda#cfe749056fb9ed9dbc096b5751becf34 -https://conda.anaconda.org/conda-forge/win-64/blas-2.122-mkl.conda#aee642435696de144ddf91dc02101cf8 -https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.8.4-py39hf19769e_0.conda#7836c3dc5814f6d55a7392657c576e88 -https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.8.4-py39hcbf5309_0.conda#cc66c372d5eb745665da06ce56b7d72b +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-h0e40799_1002.conda#3c8f2573569bb816483e5cf57efbbe29 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/win-64/tornado-6.4.2-py310ha8f682b_0.conda#e6819d3a0cae0f1b1838875f858421d1 +https://conda.anaconda.org/conda-forge/win-64/unicodedata2-16.0.0-py310ha8f682b_0.conda#b28aead44c6e19a1fbba7752aa242b34 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.12-h0e40799_0.conda#2ffbfae4548098297c033228256eb96e +https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.5-h0e40799_0.conda#8393c0f7e7870b4eb45553326f81f0ff +https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-h2466b09_2.conda#378f1c9421775dfe644731cb121c8979 +https://conda.anaconda.org/conda-forge/win-64/coverage-7.8.0-py310h38315fa_0.conda#30a825dae940c63c55bca8df4f806f3e +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/win-64/lcms2-2.17-hbcf6048_0.conda#3538827f77b82a837fa681a4579e37a1 +https://conda.anaconda.org/conda-forge/win-64/libfreetype-2.13.3-h57928b3_1.conda#410ba2c8e7bdb278dfbb5d40220e39d2 +https://conda.anaconda.org/conda-forge/win-64/libxcb-1.17.0-h0e4246c_0.conda#a69bbf778a462da324489976c84cfc8c +https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.3-h4d64b90_0.conda#fc050366dd0b8313eb797ed1ffef3a29 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/win-64/tbb-2021.13.0-h62715c5_1.conda#9190dd0a23d925f7602f9628b3aed511 +https://conda.anaconda.org/conda-forge/win-64/fonttools-4.57.0-py310h38315fa_0.conda#1f25f742c39582715cc058f5fe451975 +https://conda.anaconda.org/conda-forge/win-64/freetype-2.13.3-h57928b3_1.conda#633504fe3f96031192e40e3e6c18ef06 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/win-64/mkl-2024.2.2-h66d3029_15.conda#302dff2807f2927b3e9e0d19d60121de +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-6.1.1-pyhd8ed1ab_0.conda#1e35d8f975bc0e984a19819aa91c440a +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/win-64/fontconfig-2.15.0-h765892d_1.conda#9bb0026a2131b09404c59c4290c697cd +https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-31_h641d27c_mkl.conda#d05563c577fe2f37693a554b3f271e8f +https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2024.2.2-h57928b3_15.conda#a85f53093da069c7c657f090e388f3ef +https://conda.anaconda.org/conda-forge/win-64/pillow-11.1.0-py310h9595edc_0.conda#67a38507ac20bd85226fe6dd7ed87462 +https://conda.anaconda.org/conda-forge/win-64/cairo-1.18.4-h5782bbf_0.conda#20e32ced54300292aff690a69c5e7b97 +https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-31_h5e41251_mkl.conda#43c100b94ad2607382b0cf0f3a6b0bf3 +https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-31_h1aa476e_mkl.conda#40b47ee720a185289760960fc6185750 +https://conda.anaconda.org/conda-forge/win-64/harfbuzz-11.1.0-h8796e6f_0.conda#dcc4a63f231cc52197c558f5e07e0a69 +https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-31_h845c4fa_mkl.conda#003a2041cb07a7cf698f48dd26301273 +https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.5-py310h4987827_0.conda#19e9c5868faa8046020ce870a9a9d0fc +https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-31_hfb1a452_mkl.conda#0deeb3d9d6f0e56393c55ef382899010 +https://conda.anaconda.org/conda-forge/win-64/contourpy-1.3.2-py310hc19bc0b_0.conda#039416813b5290e7d100a05bb4326110 +https://conda.anaconda.org/conda-forge/win-64/qt6-main-6.9.0-h83cda92_1.conda#412f970fc305449b6ee626fe9c6638a8 +https://conda.anaconda.org/conda-forge/win-64/scipy-1.15.2-py310h15c175c_0.conda#81798168111d1021e3d815217c444418 +https://conda.anaconda.org/conda-forge/win-64/blas-2.131-mkl.conda#1842bfaa4e349875c47bde1d9871bda6 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.10.1-py310h37e0a56_0.conda#1b78c5c0741473537e39e425ff30ea80 +https://conda.anaconda.org/conda-forge/win-64/pyside6-6.9.0-py310hc1b6536_0.conda#e90c8d8a817b5d63b7785d7d18c99ae0 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.10.1-py310h5588dad_0.conda#246bfc9ca36dccad2d78a020ab8d2aab diff --git a/build_tools/azure/pymin_conda_defaults_openblas_environment.yml b/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml similarity index 58% rename from build_tools/azure/pymin_conda_defaults_openblas_environment.yml rename to build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml index 3a8379e28068e..a179c55fed993 100644 --- a/build_tools/azure/pymin_conda_defaults_openblas_environment.yml +++ b/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml @@ -2,22 +2,25 @@ # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py channels: - - defaults + - conda-forge dependencies: - - python=3.9 - - numpy=1.21 + - python=3.10 + - numpy=1.22.0 # min - blas[build=openblas] - - scipy=1.7 + - scipy=1.8.0 # min - cython=3.0.10 # min - joblib=1.2.0 # min - - matplotlib=3.3.4 # min - - pyamg - - pytest<8 + - threadpoolctl=3.1.0 # min + - matplotlib=3.5.0 # min + - pandas=1.4.0 # min + - pyamg=4.2.1 # min + - pytest - pytest-xdist - pillow + - pip + - ninja + - meson-python=0.16.0 # min - pytest-cov - coverage - ccache - - pip - - pip: - - threadpoolctl==3.1.0 # min + - polars=0.20.30 # min diff --git a/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock b/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock new file mode 100644 index 0000000000000..d68f376c0d376 --- /dev/null +++ b/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock @@ -0,0 +1,190 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 41111e5656d9d33f83f1160f643ec4ab314aa8e179923dbe1350c87b0ac2f400 +@EXPLICIT +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda#434ca7e50e40f4918ab701e3facd59a0 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-20.1.4-h024ca30_0.conda#4fc395cda27912a7d904b86b5dbf3a4d +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-3_kmp_llvm.conda#ee5c2118262e30b972bc0b4db8ef0ba5 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda#c151d5eb730e9b7480e6d48c0fc44048 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.14-hb9d3cd8_0.conda#76df83c2a9035c54df5d04ff81bcc02d +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.24.1-h5888daf_0.conda#d54305672f0361c2f3886750e7165b5f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda#41b599ed2b02abcfdd84302bff174b23 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.24.1-h5888daf_0.conda#2ee6d71b72f75d50581f2f68e965efdb +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.5-hd0c01bc_1.conda#68e52064ed3897463c0e958ab5c8f91b +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.5.2-hd0c01bc_0.conda#b64523fb87ac6f87f0790f324ad43046 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxshmfence-1.3.3-hb9d3cd8_0.conda#9a809ce9f65460195777f2f2116bae02 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.0-h5888daf_0.conda#d6845ae4dea52a2f90178bf1829a21f8 +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.24.1-h8e693c7_0.conda#57566a81dd1e5aa3d98ac7582e8bfe03 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda#9566f0bd264fbd463002e759b8a82401 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb9d3cd8_2.conda#06f70867945ea6a84d35836af780f1de +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.24.1-h5888daf_0.conda#8f04c7aae6a46503bc36d1ed5abc8c7c +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.55-h3f2d84a_0.conda#2bd47db5807daade8500ed7ca4c512a4 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.9-hc50e24c_0.conda#c7f302fd11eeb0987a6a5e1f3aed6a21 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.0.1-h266115a_6.conda#94116b69829e90b72d566e64421e1bff +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.36-h5888daf_0.conda#de9cd5bca9e4918527b9b72b6e2e1409 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.0-h29eaf8c_0.conda#d2f1c87d4416d1e7344cf92b1aaee1c4 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda#c63b5e52939e795ba8d26e35d767a843 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.24.1-h8e693c7_0.conda#8f66ed2e34507b7ae44afa31c3e4ec79 +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.75-h39aace5_0.conda#c44c16d6976d2aebbd65894d7741e67e +https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.124-hb9d3cd8_0.conda#8bc89311041d7fcb510238cf0848ccae +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-lib-1.11.0-hb9d3cd8_2.conda#e55712ff40a054134d51b89afca57dbc +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.0.1-he0572af_6.conda#9802ae6d20982f42c0f5d69008988763 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.111-h159eef7_0.conda#311e8370c9db254611ec87250f6370a0 +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.45-hc749103_0.conda#b90bece58b4c2bf25969b70f3be42d25 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.17-hd6af730_0_cpython.conda#7bb89638dae9ce1b8e051d0b721e83c2 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda#1c74ff8c35dcadf952a16f752ca5aa49 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.12-h4f16b4b_0.conda#db038ce880f100acc74dba10302b5630 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda#98514fe74548d768907ce7a13f680e8f +https://conda.anaconda.org/conda-forge/noarch/certifi-2025.4.26-pyhd8ed1ab_0.conda#c33eeaaa33f45031be34cda513df39b6 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py310hc6cd4ac_0.conda#bd1d71ee240be36f1d85c86177d6964f +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.24.1-h5888daf_0.conda#c63e7590d4d6f4c85721040ed8b12888 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py310h3788b33_0.conda#4186d9b4d004b0fe0de6aa62496fb48a +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.84.1-h3618099_1.conda#714c97d4ff495ab69d1fdfcadbcae985 +https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda#c8013e438185f33b13814c5c488acd5c +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.25-pthreads_h413a1c8_0.conda#d172b34a443b95f86089e8229ddc9a17 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-257.4-h4e0b6ca_1.conda#04bcf3055e51f8dde6fab9672fb9fca0 +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h4bc477f_1.conda#ad1f1f8238834cd3c88ceeaee8da444a +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_3.conda#fd5062942bfa1b0bd5e0d2a4397b099e +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py310ha75aee5_0.conda#166d59aab40b9c607b4cc21c03924e9d +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda#83fc6ae00127671e301c9f44254c31b8 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-16.0.0-py310ha75aee5_0.conda#1d7a4b9202cdd10d56ecdd7f6c347190 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.44-hb9d3cd8_0.conda#7c91bfc90672888259675ad2ad28af9c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.11.3-h80c52d3_0.conda#eb517c6a2b960c3ccb6f1db1005f063a +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.8.0-py310h89163eb_0.conda#9f7865c17117d16f804b687b498e35fa +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.57.0-py310h89163eb_0.conda#34378af82141b3c1725dcdf898b28fc6 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.84.1-h4833e2c_1.conda#418de18c9b79a3d8583d90d27e0937c2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-20_linux64_openblas.conda#2b7bb4f7562c8cf334fc2e20c2d28abc +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda#928b8be80851f5d8ffb016f9c81dae7a +https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.4-he9d0ab4_0.conda#96c33bbd084ef2b2463503fb7f1482ae +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.9.2-h65c71a3_0.conda#d045b1d878031eb497cab44e6392b1df +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.25-pthreads_h7a3da1a_0.conda#87661673941b5e702275fdf0fc095ad0 +https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/linux-64/sip-6.8.6-py310hf71b8c6_2.conda#a50d1007fecaff3f98b19034a8e0b2e7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda#b5fcc7172d22516e1f965490e65e33a4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.6-hb9d3cd8_0.conda#5efa5fa6243a622445fdfd72aee15efa +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee +https://conda.anaconda.org/conda-forge/linux-64/glib-2.84.1-h6287aef_1.conda#35012688d30e1b52bff2ba5d1f342a50 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-20_linux64_openblas.conda#36d486d72ab64ffea932329a1d3729a3 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.4-default_h1df26ce_0.conda#96f8d5b2e94c9ba4fef19f1adf068a15 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-20.1.4-default_he06ed0a_0.conda#2d933632c8004be47deb2be61bf013be +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-20_linux64_openblas.conda#6fabc51f5e647d09cc010c40061557e0 +https://conda.anaconda.org/conda-forge/linux-64/libpq-17.4-h27ae623_1.conda#37fba334855ef3b51549308e61ed7a3d +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py310h7e6dc6c_0.conda#14d300b9e1504748e70cc6499a7b4d25 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.13.0-py310hf71b8c6_1.conda#0c8cbfbe70f4c8a47b040a14615e6f1f +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-6.1.1-pyhd8ed1ab_0.conda#1e35d8f975bc0e984a19819aa91c440a +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda#09262e66b19567aff4f592fb53b28760 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.11-hc37bda9_0.conda#056d86cacf2b48c79c6a562a2486eb8c +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-20_linux64_openblas.conda#05c5862c7dc25e65ba6c471d96429dae +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.22.0-py310h454958d_1.tar.bz2#607c66f0cce2986515a8fe9e136b2b57 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hac146a9_1.conda#66b1fa9608d8836e25f9919159adc9c6 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-20_linux64_openblas.conda#9932a1d4e9ecf2d35fb19475446e361e +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.11-h651a532_0.conda#d8d8894f8ced2c9be76dc9ad1ae531ce +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-11.1.0-h3beb420_0.conda#95e3bb97f9cdc251c0c68640e9c10ed3 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.5.0-py310h23f4a51_0.tar.bz2#9911225650b298776c8e8c083b5cacf1 +https://conda.anaconda.org/conda-forge/linux-64/pandas-1.4.0-py310hb5077e9_0.tar.bz2#43e920bc9856daa7d8d18fcbfb244c4e +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.30-py310h031f9ce_0.conda#0743f5db9f978b6df92d412935ff8371 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.8.0-py310hea5193d_1.tar.bz2#664d80ddeb51241629b3ada5ea926e4d +https://conda.anaconda.org/conda-forge/linux-64/blas-2.120-openblas.conda#c8f6916a81a340650078171b1d852574 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.1-py310h7c3ba0c_0.tar.bz2#89f5a48e1f23b5cf3163a6094903d181 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.15-h993ce98_3.conda#aa49f5308f39277477d47cd6687eb8f3 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.10-py310hb3b5edb_1.conda#c370972fc4557cb54d265c9c1f71bd20 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.5.0-py310hff52083_0.tar.bz2#1b2f3b135d5d9c594b5e0e6150c03b7b diff --git a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml index 855909a2c262a..267c149fd1c35 100644 --- a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml @@ -4,17 +4,16 @@ channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - numpy - blas[build=openblas] - scipy - cython - joblib - threadpoolctl - - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - pip diff --git a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock index c7a155bece187..b7899b98ba3fa 100644 --- a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock @@ -1,205 +1,116 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: a64ed7d3cc839a12cb1faa238a89d4aec55abc43d335791f0e8422f5722ff662 +# input_hash: 26bb2530999c20f24bbab0f7b6e3545ad84d059a25027cb624997210afc23693 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda#3cfab3e709f77e9f1b3d380eb622494a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda#df88796bd09a0d2ed292e59101478ad8 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c -https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-h43f5ff8_6.conda#e54a5ddc67e673f9105cf2a2e9c070b0 -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda#06d02030237f4d5b3d9a7e7d348fe3c6 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 -https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.0-h00ab1b0_0.conda#b048701d52e7cbb5f59ddd4d3b17bbf5 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_6.conda#3666a850342f8f3be88f9a93d948d027 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.6-h232c23b_2.conda#9a3a42df8a95f65334dfc7b80da1195d -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.0-hf2295e7_6.conda#9342e7c44c38bea649490f72d92c382d -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef -https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.3-h2448989_0.conda#927b6d6e80b2c0d4405a58b61ca248a3 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.3-h4dfa4b3_0.conda#d39965123dffcad4d750989be65bcb7c -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f -https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.0-hde27a5a_6.conda#a9d23c02485c5cf055f9ac90eb9c9c63 -https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda#0a4d0252248ef9a0f88f2ba8b8a08e12 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.17-hd6af730_0_cpython.conda#7bb89638dae9ce1b8e051d0b721e83c2 +https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda#1fd9696649f65fd6611fcdb4ffec738a +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py310hf71b8c6_2.conda#bf502c169c71e3c6ac0d6175addfacc2 +https://conda.anaconda.org/conda-forge/noarch/certifi-2025.4.26-pyhd8ed1ab_0.conda#c33eeaaa33f45031be34cda513df39b6 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.2-pyhd8ed1ab_0.conda#40fe4284b8b5835a9073a645139f35af +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.12-py310had8cdd9_0.conda#b630fe36f0b621d23e74872dc4fd2bd7 +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda#24c1ca34138ee57de72a943237cde4cc +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e +https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac +https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda#39a4f67be3286c86d696df570b1201b7 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.3-default_h5d6823c_0.conda#5fff487759736b275dc3e4a263cac666 -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 -https://conda.anaconda.org/conda-forge/linux-64/libpq-16.2-h33b98f1_1.conda#9e49ec2a61d02623b379dc332eb6889d -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.17.2-pyhd8ed1ab_0.conda#140a7f159396547e9799aa98f9f0742e -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d -https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda#728dbebd0f7a20337218beacffd37916 +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py310h89163eb_1.conda#8ce3f0332fd6de0d737e2911d329523f +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.29-pthreads_h6ec200e_0.conda#7e4d48870b3258bea920d51b7f495a81 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda#12c566707c80111f9799308d9e265aef +https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.1-pyhd8ed1ab_0.conda#232fb4577b6687b2d503ef8e254270c9 +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 -https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hd1e30aa_0.conda#79f5dd8778873faa54e8f7b2729fe8a6 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.0-hf2295e7_6.conda#a1e026a82a562b443845db5614ca568a -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.1-h98fc4e7_1.conda#b04b5cdf3ba01430db27979250bc5a1d -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.4.0-h3d44ed6_0.conda#27f46291a6aaa3c2a4f798ebd35a7ddb -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.1-hfa15dee_1.conda#a6dd2bbc684913e2bef0a54ce56fcbfb -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hddac248_0.conda#259c4e76e6bda8888aefc098ae1ba749 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py39h474f0d3_0.conda#46ae0ecba9726ab4fa44c78fefa522cf -https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39he9076e7_0.conda#1919384a8420e7bb25f6c3a582e0857c -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39hda80f44_0.conda#f225666c47726329201b604060f1436c -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_0.conda#c66d2da2669fddc657b679bccab95775 -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_0.conda#1ad3afced398492586ca1bef70328be4 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 -https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda#fa839b5ff59e192f411ccc7dae6588bb +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda#959484a66b4b76befcddc4fa97c95567 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda#0a01c169f0ab0f91b26e77a3301fbfe4 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.11.3-h80c52d3_0.conda#eb517c6a2b960c3ccb6f1db1005f063a +https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.1-py310h8deb56e_0.conda#1fc24a3196ad5ede2a68148be61894f4 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/noarch/h2-4.2.0-pyhd8ed1ab_0.conda#b4754fb1bdcb70c8fd54f918301582c6 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda#abb32c727da370c481a1c206f5159ce9 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda#452b98eafe050ecff932f0ec832dd03f +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-31_he2f377e_openblas.conda#7e5fff7d0db69be3a266f7e79a3bb0e2 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.5-py310hefbff90_0.conda#5526bc875ec897f0d335e38da832b6ee +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py310h7e6dc6c_0.conda#14d300b9e1504748e70cc6499a7b4d25 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py310ha75aee5_2.conda#f9254b5b0193982416b91edcb4b2676f +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-31_h1ea3ea9_openblas.conda#ba652ee0576396d4765e567f043c57f9 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py310h5eaa309_3.conda#07697a584fab513ce895c4511f7a2403 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.15.2-py310h1d65ade_0.conda#8c29cd33b64b2eb78597fa28b5595c8d +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.4.0-pyhd8ed1ab_0.conda#c1e349028e0052c4eea844e94f773065 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.131-openblas.conda#38b2ec894c69bb4be0e66d2ef7fc60bf +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py310ha2bacc8_1.conda#817d32861729e14f474249f1036291c4 +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_1.conda#a9b9368f3701a417eac9edbcae7cb737 +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_1.conda#5af206d64d18d6c8dfb3122b4d9e643b +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda#16e3f039c0aa6446513e94ab18a8784b +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda#910f28a05c178feba832f842155cbfff +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda#e9fb3fe8a5b758b4aff187d434f94f03 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda#00534ebcc0375929b45c3039b5ba7636 +https://conda.anaconda.org/conda-forge/noarch/sphinx-8.1.3-pyhd8ed1ab_1.conda#1a3281a0dc355c02b5506d87db2d78ac +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda#3bc61f7161d28137797e038263c04c54 diff --git a/build_tools/azure/pypy3_linux-64_conda.lock b/build_tools/azure/pypy3_linux-64_conda.lock deleted file mode 100644 index 23710cfe35cb8..0000000000000 --- a/build_tools/azure/pypy3_linux-64_conda.lock +++ /dev/null @@ -1,103 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: c4b15c5bfeffe4d558e4ece0c996e6cc04c00369326c72d19780ffc0209bd591 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda#f6f6600d18a4047b54f803cf708b868a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_pypy39_pp73.conda#c1b2f29111681a4036ed21eaa3f44620 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda#d4ff227c46917d3b4565302a2bbb276b -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_5.conda#7a6bd7a12a4bd359e2afe6c0fa1acace -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.2-hd590300_1.conda#049b7df8bae5e184d1de42cdf64855f8 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 -https://conda.anaconda.org/conda-forge/linux-64/ninja-1.11.1-h924138e_0.conda#73a4953a2d9c115bdc10ff30a52f675f -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.2-h2797004_0.conda#866983a220e27a80cb75e85cb30466a1 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb -https://conda.anaconda.org/conda-forge/linux-64/gdbm-1.18-h0a1914f_2.tar.bz2#b77bc399b07a19c00fe12fdc95ee0297 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.3-h4dfa4b3_0.conda#d39965123dffcad4d750989be65bcb7c -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.45.2-h2c6b66d_0.conda#1423efca06ed343c1da0fc429bae0779 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 -https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.15-h9557127_1.conda#0862f2ce457660f1060225d96d468237 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-1_73_pypy.conda#6e0143cd3dd940d3004cd857e37ccd81 -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39hc10206b_0.conda#60c2d58b33a21c32f469e3f6a9eb7e4b -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39ha90811c_1.conda#25edffabcb0760fc1821597c4ce920db -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h6dedee3_0.conda#557d64563e84ff21b14f586c7f662b7f -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90a76f3_0.conda#799e6519cfffe2784db27b1db2ef33f3 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#139e9feb65187e916162917bb2484976 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.15-1_pypy39.conda#a418a6c16bd6f7ed56b92194214791a0 -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hf860d4a_0.conda#e7fded713fb466e1e0670afce1761b47 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hf860d4a_0.conda#f699157518d28d00c87542b4ec1273be -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39ha90811c_0.conda#07ed14c8326da42356514bcbc0b04802 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hf860d4a_0.conda#63421b4dd7222fad555e34ec9af015a1 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.7.1-pyhd8ed1ab_0.conda#dcb27826ffc94d5f04e241322239983b -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h6dedee3_2.conda#6c5d74bac41838f4377dfd45085e1fec -https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.15.0-pyh0c530f3_0.conda#3bc64565ca78ce3bb80248d09926d8f9 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39h5fd064f_0.conda#04676d2a49da3cb608af77e04b796ce1 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39h4e7d633_0.conda#58272019e595dde98d0844ae3ebf0cfe -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39h4162558_0.conda#b0f7702a174422ff1db58190495fd766 diff --git a/build_tools/azure/pytest-pyodide.js b/build_tools/azure/pytest-pyodide.js deleted file mode 100644 index c195940ce3b5b..0000000000000 --- a/build_tools/azure/pytest-pyodide.js +++ /dev/null @@ -1,53 +0,0 @@ -const { opendir } = require('node:fs/promises'); -const { loadPyodide } = require("pyodide"); - -async function main() { - let exit_code = 0; - try { - global.pyodide = await loadPyodide(); - let pyodide = global.pyodide; - const FS = pyodide.FS; - const NODEFS = FS.filesystems.NODEFS; - - let mountDir = "/mnt"; - pyodide.FS.mkdir(mountDir); - pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir); - - await pyodide.loadPackage(["micropip"]); - await pyodide.runPythonAsync(` - import glob - import micropip - - wheels = glob.glob('/mnt/dist/*.whl') - wheels = [f'emfs://{wheel}' for wheel in wheels] - print(f'installing wheels: {wheels}') - await micropip.install(wheels); - - pkg_list = micropip.list() - print(pkg_list) - `); - - // Pyodide is built without OpenMP, need to set environment variable to - // skip related test - await pyodide.runPythonAsync(` - import os - os.environ['SKLEARN_SKIP_OPENMP_TEST'] = 'true' - `); - - await pyodide.runPythonAsync("import micropip; micropip.install('pytest')"); - let pytest = pyodide.pyimport("pytest"); - let args = process.argv.slice(2); - console.log('pytest args:', args); - exit_code = pytest.main(pyodide.toPy(args)); - } catch (e) { - console.error(e); - // Arbitrary exit code here. I have seen this code reached instead of a - // Pyodide fatal error sometimes - exit_code = 66; - - } finally { - process.exit(exit_code); - } -} - -main(); diff --git a/build_tools/azure/python_nogil_lock.txt b/build_tools/azure/python_nogil_lock.txt deleted file mode 100644 index 03cd4f2e0c346..0000000000000 --- a/build_tools/azure/python_nogil_lock.txt +++ /dev/null @@ -1,72 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=/scikit-learn/build_tools/azure/python_nogil_lock.txt /scikit-learn/build_tools/azure/python_nogil_requirements.txt -# ---index-url https://d1yxz45j0ypngg.cloudfront.net/ ---extra-index-url https://pypi.org/simple - -contourpy==1.1.1 - # via matplotlib -cycler==0.12.1 - # via matplotlib -cython==3.0.10 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -exceptiongroup==1.2.0 - # via pytest -execnet==2.0.2 - # via pytest-xdist -fonttools==4.50.0 - # via matplotlib -iniconfig==2.0.0 - # via pytest -joblib==1.3.2 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -kiwisolver==1.4.4 - # via matplotlib -matplotlib==3.6.2 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -meson==1.4.0 - # via meson-python -meson-python==0.15.0 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -ninja==1.11.1.1 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -numpy==1.24.0 - # via - # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt - # contourpy - # matplotlib - # scipy -packaging==24.0 - # via - # matplotlib - # pyproject-metadata - # pytest -pillow==9.5.0 - # via matplotlib -pluggy==1.4.0 - # via pytest -pyparsing==3.1.2 - # via matplotlib -pyproject-metadata==0.7.1 - # via meson-python -pytest==7.4.4 - # via - # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt - # pytest-xdist -pytest-xdist==3.5.0 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -python-dateutil==2.9.0.post0 - # via matplotlib -scipy==1.9.3 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -six==1.16.0 - # via python-dateutil -threadpoolctl==3.4.0 - # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -tomli==2.0.1 - # via - # meson-python - # pytest diff --git a/build_tools/azure/python_nogil_requirements.txt b/build_tools/azure/python_nogil_requirements.txt deleted file mode 100644 index 2cebad9a03b25..0000000000000 --- a/build_tools/azure/python_nogil_requirements.txt +++ /dev/null @@ -1,20 +0,0 @@ -# To generate python_nogil_lock.txt, use the following command: -# docker run -v $PWD:/scikit-learn -it nogil/python bash -c 'pip install pip-tools; pip-compile --upgrade /scikit-learn/build_tools/azure/python_nogil_requirements.txt -o /scikit-learn/build_tools/azure/python_nogil_lock.txt' -# -# The reason behind it is that you need python-nogil to generate the pip lock -# file. Using pip-compile --index and --extra-index will not work, for example -# the latest cython will be picked up from PyPI, rather than the one from the -# python-nogil index -matplotlib -numpy -scipy -cython -joblib -threadpoolctl -# TODO: somehow pytest 8 does not seem to work with meson editable -# install. Exit code is 5, i.e. no test collected -# This would be fixed by https://github.com/mesonbuild/meson-python/pull/569 -pytest<8 -pytest-xdist -meson-python -ninja diff --git a/build_tools/azure/test_docs.sh b/build_tools/azure/test_docs.sh index 61e855425786b..f3f824d5806b0 100755 --- a/build_tools/azure/test_docs.sh +++ b/build_tools/azure/test_docs.sh @@ -1,11 +1,21 @@ #!/bin/bash -set -e +set -ex -if [[ "$DISTRIB" =~ ^conda.* ]]; then - source activate $VIRTUALENV -elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "pip-nogil" ]]; then - source $VIRTUALENV/bin/activate -fi +source build_tools/shared.sh +activate_environment -make test-doc +scipy_doctest_installed=$(python -c 'import scipy_doctest' && echo "True" || echo "False") +if [[ "$scipy_doctest_installed" == "True" ]]; then + doc_rst_files=$(find $PWD/doc -name '*.rst' | sort) + # Changing dir, as we do in build_tools/azure/test_script.sh, avoids an + # error when importing sklearn. Not sure why this happens ... I am going to + # wild guess that it has something to do with the bespoke way we set up + # conda with putting conda in the PATH and source activate, rather than + # source /etc/profile.d/conda.sh + conda activate. + cd $TEST_DIR + # with scipy-doctest, --doctest-modules only runs doctests (in contrary to + # vanilla pytest where it runs doctests on top of normal tests) + python -m pytest --doctest-modules --pyargs sklearn + python -m pytest --doctest-modules $doc_rst_files +fi diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index faf48e27efefb..eb4414283be2b 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -11,7 +11,10 @@ if [[ "$BUILD_REASON" == "Schedule" ]]; then # Enable global random seed randomization to discover seed-sensitive tests # only on nightly builds. # https://scikit-learn.org/stable/computing/parallelism.html#environment-variables - export SKLEARN_TESTS_GLOBAL_RANDOM_SEED="any" + export SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$(($RANDOM % 100)) + echo "To reproduce this test run, set the following environment variable:" + echo " SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED", + echo "See: https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed" # Enable global dtype fixture for all nightly builds to discover # numerical-sensitive tests. @@ -27,7 +30,7 @@ if [[ "$COMMIT_MESSAGE" =~ \[float32\] ]]; then fi mkdir -p $TEST_DIR -cp setup.cfg $TEST_DIR +cp pyproject.toml $TEST_DIR cd $TEST_DIR python -c "import joblib; print(f'Number of cores (physical): \ @@ -36,7 +39,7 @@ python -c "import sklearn; sklearn.show_versions()" show_installed_libraries -TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML" +TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML -o junit_family=legacy" if [[ "$COVERAGE" == "true" ]]; then # Note: --cov-report= is used to disable to long text output report in the @@ -45,6 +48,12 @@ if [[ "$COVERAGE" == "true" ]]; then # report that otherwise hides the test failures and forces long scrolls in # the CI logs. export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc" + + # Use sys.monitoring to make coverage faster for Python >= 3.12 + HAS_SYSMON=$(python -c 'import sys; print(sys.version_info >= (3, 12))') + if [[ "$HAS_SYSMON" == "True" ]]; then + export COVERAGE_CORE=sysmon + fi TEST_CMD="$TEST_CMD --cov-config='$COVERAGE_PROCESS_START' --cov sklearn --cov-report=" fi @@ -60,15 +69,22 @@ if [[ -n "$SELECTED_TESTS" ]]; then export SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all" fi -TEST_CMD="$TEST_CMD --pyargs sklearn" -if [[ "$DISTRIB" == "conda-pypy3" ]]; then - # Run only common tests for PyPy. Running the full test suite uses too - # much memory and causes the test to time out sometimes. See - # https://github.com/scikit-learn/scikit-learn/issues/27662 for more - # details. - TEST_CMD="$TEST_CMD.tests.test_common" +if which lscpu ; then + lscpu +else + echo "Could not inspect CPU architecture." +fi + +if [[ "$DISTRIB" == "conda-free-threaded" ]]; then + # Make sure that GIL is disabled even when importing extensions that have + # not declared free-threaded compatibility. This can be removed when numpy, + # scipy and scikit-learn extensions all have declared free-threaded + # compatibility. + export PYTHON_GIL=0 fi +TEST_CMD="$TEST_CMD --pyargs sklearn" + set -x eval "$TEST_CMD" set +x diff --git a/build_tools/azure/test_script_pyodide.sh b/build_tools/azure/test_script_pyodide.sh deleted file mode 100644 index d1aa207f864a2..0000000000000 --- a/build_tools/azure/test_script_pyodide.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -set -e - -# We are using a pytest js wrapper script to run tests inside Pyodide. Maybe -# one day we can use a Pyodide venv instead but at the time of writing -# (2023-09-27) there is an issue with scipy.linalg in a Pyodide venv, see -# https://github.com/pyodide/pyodide/issues/3865 for more details. -node build_tools/azure/pytest-pyodide.js --pyargs sklearn --durations 20 --showlocals diff --git a/build_tools/azure/ubuntu_atlas_lock.txt b/build_tools/azure/ubuntu_atlas_lock.txt index d1674c678b254..bb4ee75928009 100644 --- a/build_tools/azure/ubuntu_atlas_lock.txt +++ b/build_tools/azure/ubuntu_atlas_lock.txt @@ -6,30 +6,30 @@ # cython==3.0.10 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -exceptiongroup==1.2.1 +exceptiongroup==1.2.2 # via pytest execnet==2.1.1 # via pytest-xdist -iniconfig==2.0.0 +iniconfig==2.1.0 # via pytest joblib==1.2.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -meson==1.4.0 +meson==1.8.0 # via meson-python -meson-python==0.16.0 +meson-python==0.18.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -ninja==1.11.1.1 +ninja==1.11.1.4 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -packaging==24.0 +packaging==25.0 # via # meson-python # pyproject-metadata # pytest pluggy==1.5.0 # via pytest -pyproject-metadata==0.8.0 +pyproject-metadata==0.9.1 # via meson-python -pytest==7.4.4 +pytest==8.3.5 # via # -r build_tools/azure/ubuntu_atlas_requirements.txt # pytest-xdist @@ -37,7 +37,7 @@ pytest-xdist==3.6.1 # via -r build_tools/azure/ubuntu_atlas_requirements.txt threadpoolctl==3.1.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -tomli==2.0.1 +tomli==2.2.1 # via # meson-python # pytest diff --git a/build_tools/azure/ubuntu_atlas_requirements.txt b/build_tools/azure/ubuntu_atlas_requirements.txt index 805d84d4d0aac..dfb0cfebc54d1 100644 --- a/build_tools/azure/ubuntu_atlas_requirements.txt +++ b/build_tools/azure/ubuntu_atlas_requirements.txt @@ -4,7 +4,7 @@ cython==3.0.10 # min joblib==1.2.0 # min threadpoolctl==3.1.0 # min -pytest<8 +pytest pytest-xdist ninja meson-python diff --git a/build_tools/azure/upload_codecov.sh b/build_tools/azure/upload_codecov.sh index 0e87b2dafc8b4..4c3db8fe8bbd6 100755 --- a/build_tools/azure/upload_codecov.sh +++ b/build_tools/azure/upload_codecov.sh @@ -9,8 +9,8 @@ fi # When we update the codecov uploader version, we need to update the checksums. # The checksum for each codecov binary is available at -# https://uploader.codecov.io e.g. for linux -# https://uploader.codecov.io/v0.7.1/linux/codecov.SHA256SUM. +# https://cli.codecov.io e.g. for linux +# https://cli.codecov.io/v10.2.1/linux/codecov.SHA256SUM. # Instead of hardcoding a specific version and signature in this script, it # would be possible to use the "latest" symlink URL but then we need to @@ -20,9 +20,8 @@ fi # However this approach would yield a larger number of downloads from # codecov.io and keybase.io, therefore increasing the risk of running into # network failures. -CODECOV_UPLOADER_VERSION=0.7.1 -CODECOV_BASE_URL="https://uploader.codecov.io/v$CODECOV_UPLOADER_VERSION" - +CODECOV_CLI_VERSION=10.2.1 +CODECOV_BASE_URL="https://cli.codecov.io/v$CODECOV_CLI_VERSION" # Check that the git repo is located at the expected location: if [[ ! -d "$BUILD_REPOSITORY_LOCALPATH/.git" ]]; then @@ -39,19 +38,22 @@ fi if [[ $OSTYPE == *"linux"* ]]; then curl -Os "$CODECOV_BASE_URL/linux/codecov" - SHA256SUM="b9282b8b43eef83f722646d8992c4dd36563046afe0806722184e7e9923a6d7b codecov" + SHA256SUM="39dd112393680356daf701c07f375303aef5de62f06fc80b466b5c3571336014 codecov" echo "$SHA256SUM" | shasum -a256 -c chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose + ./codecov upload-coverage -t ${CODECOV_TOKEN} -f coverage.xml -Z + ./codecov do-upload --disable-search --report-type test_results --file $JUNIT_FILE elif [[ $OSTYPE == *"darwin"* ]]; then curl -Os "$CODECOV_BASE_URL/macos/codecov" - SHA256SUM="e4ce34c144d3195eccb7f8b9ca8de092d2a4be114d927ca942500f3a6326225c codecov" + SHA256SUM="01183f6367c7baff4947cce389eaa511b7a6d938e37ae579b08a86b51f769fd9 codecov" echo "$SHA256SUM" | shasum -a256 -c chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose + ./codecov upload-coverage -t ${CODECOV_TOKEN} -f coverage.xml -Z + ./codecov do-upload --disable-search --report-type test_results --file $JUNIT_FILE else curl -Os "$CODECOV_BASE_URL/windows/codecov.exe" - SHA256SUM="f5de88026f061ff08b88a5895f9c11855523924ceb8174e027403dd20fa5e4d6 codecov.exe" + SHA256SUM="e54e9520428701a510ef451001db56b56fb17f9b0484a266f184b73dd27b77e7 codecov.exe" echo "$SHA256SUM" | sha256sum -c - ./codecov.exe -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose + ./codecov.exe upload-coverage -t ${CODECOV_TOKEN} -f coverage.xml -Z + ./codecov.exe do-upload --disable-search --report-type test_results --file $JUNIT_FILE fi diff --git a/build_tools/azure/windows.yml b/build_tools/azure/windows.yml index 1727da4138f07..b3fcf130f9350 100644 --- a/build_tools/azure/windows.yml +++ b/build_tools/azure/windows.yml @@ -83,3 +83,4 @@ jobs: retryCountOnTaskFailure: 5 env: CODECOV_TOKEN: $(CODECOV_TOKEN) + JUNIT_FILE: $(TEST_DIR)/$(JUNITXML) diff --git a/build_tools/check-meson-openmp-dependencies.py b/build_tools/check-meson-openmp-dependencies.py new file mode 100644 index 0000000000000..43a7426494160 --- /dev/null +++ b/build_tools/check-meson-openmp-dependencies.py @@ -0,0 +1,172 @@ +""" +Check that OpenMP dependencies are correctly defined in meson.build files. + +This is based on trying to make sure the the following two things match: +- the Cython files using OpenMP (based on a git grep regex) +- the Cython extension modules that are built with OpenMP compiler flags (based + on meson introspect json output) +""" + +import json +import re +import subprocess +from pathlib import Path + + +def has_source_openmp_flags(target_source): + return any("openmp" in arg for arg in target_source["parameters"]) + + +def has_openmp_flags(target): + """Return whether target sources use OpenMP flags. + + Make sure that both compiler and linker source use OpenMP. + Look at `get_meson_info` docstring to see what `target` looks like. + """ + target_sources = target["target_sources"] + + target_use_openmp_flags = any( + has_source_openmp_flags(target_source) for target_source in target_sources + ) + + if not target_use_openmp_flags: + return False + + # When the target use OpenMP we expect a compiler + linker source and we + # want to make sure that both the compiler and the linker use OpenMP + assert len(target_sources) == 2 + compiler_source, linker_source = target_sources + assert "compiler" in compiler_source + assert "linker" in linker_source + + compiler_use_openmp_flags = any( + "openmp" in arg for arg in compiler_source["parameters"] + ) + linker_use_openmp_flags = any( + "openmp" in arg for arg in linker_source["parameters"] + ) + + assert compiler_use_openmp_flags == linker_use_openmp_flags + return compiler_use_openmp_flags + + +def get_canonical_name_meson(target, build_path): + """Return a name based on generated shared library. + + The goal is to return a name that can be easily matched with the output + from `git_grep_info`. + + Look at `get_meson_info` docstring to see what `target` looks like. + """ + # Expect a list with one element with the name of the shared library + assert len(target["filename"]) == 1 + shared_library_path = Path(target["filename"][0]) + shared_library_relative_path = shared_library_path.relative_to( + build_path.absolute() + ) + # Needed on Windows to match git grep output + rel_path = shared_library_relative_path.as_posix() + # OS-specific naming of the shared library .cpython- on POSIX and + # something like .cp312- on Windows + pattern = r"\.(cpython|cp\d+)-.+" + return re.sub(pattern, "", str(rel_path)) + + +def get_canonical_name_git_grep(filename): + """Return name based on filename. + + The goal is to return a name that can easily be matched with the output + from `get_meson_info`. + """ + return re.sub(r"\.pyx(\.tp)?", "", filename) + + +def get_meson_info(): + """Return names of extension that use OpenMP based on meson introspect output. + + The meson introspect json info is a list of targets where a target is a dict + that looks like this (parts not used in this script are not shown for simplicity): + { + 'name': '_k_means_elkan.cpython-312-x86_64-linux-gnu', + 'filename': [ + '/sklearn/cluster/_k_means_elkan.cpython-312-x86_64-linux-gnu.so' + ], + 'target_sources': [ + { + 'compiler': ['ccache', 'cc'], + 'parameters': [ + '-Wall', + '-std=c11', + '-fopenmp', + ... + ], + ... + }, + { + 'linker': ['cc'], + 'parameters': [ + '-shared', + '-fPIC', + '-fopenmp', + ... + ] + } + ] + } + """ + build_path = Path("build/introspect") + subprocess.check_call(["meson", "setup", build_path, "--reconfigure"]) + + json_out = subprocess.check_output( + ["meson", "introspect", build_path, "--targets"], text=True + ) + target_list = json.loads(json_out) + meson_targets = [target for target in target_list if has_openmp_flags(target)] + + return [get_canonical_name_meson(each, build_path) for each in meson_targets] + + +def get_git_grep_info(): + """Return names of extensions that use OpenMP based on git grep regex.""" + git_grep_filenames = subprocess.check_output( + ["git", "grep", "-lP", "cython.*parallel|_openmp_helpers"], text=True + ).splitlines() + git_grep_filenames = [f for f in git_grep_filenames if ".pyx" in f] + + return [get_canonical_name_git_grep(each) for each in git_grep_filenames] + + +def main(): + from_meson = set(get_meson_info()) + from_git_grep = set(get_git_grep_info()) + + only_in_git_grep = from_git_grep - from_meson + only_in_meson = from_meson - from_git_grep + + msg = "" + if only_in_git_grep: + only_in_git_grep_msg = "\n".join( + [f" {each}" for each in sorted(only_in_git_grep)] + ) + msg += ( + "Some Cython files use OpenMP," + " but their meson.build is missing the openmp_dep dependency:\n" + f"{only_in_git_grep_msg}\n\n" + ) + + if only_in_meson: + only_in_meson_msg = "\n".join([f" {each}" for each in sorted(only_in_meson)]) + msg += ( + "Some Cython files do not use OpenMP," + " you should remove openmp_dep from their meson.build:\n" + f"{only_in_meson_msg}\n\n" + ) + + if from_meson != from_git_grep: + raise ValueError( + f"Some issues have been found in Meson OpenMP dependencies:\n\n{msg}" + ) + + +if __name__ == "__main__": + main() diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh index 35fee3ae50b65..e85f3ab15e617 100755 --- a/build_tools/circle/build_doc.sh +++ b/build_tools/circle/build_doc.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash set -e +set -x # Decide what kind of documentation build to run, and run it. # @@ -30,11 +31,18 @@ then then CIRCLE_BRANCH=$GITHUB_HEAD_REF CI_PULL_REQUEST=true + CI_TARGET_BRANCH=$GITHUB_BASE_REF else CIRCLE_BRANCH=$GITHUB_REF_NAME fi fi +if [[ -n "$CI_PULL_REQUEST" && -z "$CI_TARGET_BRANCH" ]] +then + # Get the target branch name when using CircleCI + CI_TARGET_BRANCH=$(curl -s "https://api.github.com/repos/scikit-learn/scikit-learn/pulls/$CIRCLE_PR_NUMBER" | jq -r .base.ref) +fi + get_build_type() { if [ -z "$CIRCLE_SHA1" ] then @@ -159,54 +167,70 @@ if [[ `type -t deactivate` ]]; then deactivate fi -MAMBAFORGE_PATH=$HOME/mambaforge -# Install dependencies with mamba -wget -q https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \ - -O mambaforge.sh -chmod +x mambaforge.sh && ./mambaforge.sh -b -p $MAMBAFORGE_PATH -export PATH="/usr/lib/ccache:$MAMBAFORGE_PATH/bin:$PATH" +# Install Miniforge +MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" +curl -L --retry 10 $MINIFORGE_URL -o miniconda.sh +MINIFORGE_PATH=$HOME/miniforge3 +bash ./miniconda.sh -b -p $MINIFORGE_PATH +source $MINIFORGE_PATH/etc/profile.d/conda.sh +conda activate -ccache -M 512M -export CCACHE_COMPRESS=1 -# pin conda-lock to latest released version (needs manual update from time to time) -mamba install "$(get_dep conda-lock min)" -y +create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE +conda activate $CONDA_ENV_NAME -conda-lock install --log-level DEBUG --name $CONDA_ENV_NAME $LOCK_FILE -source activate $CONDA_ENV_NAME +# Sets up ccache when using system compiler +export PATH="/usr/lib/ccache:$PATH" +# Sets up ccache when using conda-forge compilers (needs to be after conda +# activate which sets CC and CXX) +export CC="ccache $CC" +export CXX="ccache $CXX" +ccache -M 512M +export CCACHE_COMPRESS=1 +# Zeroing statistics so that ccache statistics are shown only for this build +ccache -z show_installed_libraries -# Set parallelism to 3 to overlap IO bound tasks with CPU bound tasks on CI -# workers with 2 cores when building the compiled extensions of scikit-learn. -export SKLEARN_BUILD_PARALLEL=3 -pip install -e . --no-build-isolation +# Specify explicitly ninja -j argument because ninja does not handle cgroups v2 and +# use the same default rule as ninja (-j3 since we have 2 cores on CircleCI), see +# https://github.com/scikit-learn/scikit-learn/pull/30333 +pip install -e . --no-build-isolation --config-settings=compile-args="-j 3" echo "ccache build summary:" ccache -s export OMP_NUM_THREADS=1 +if [[ "$CIRCLE_BRANCH" == "main" || "$CI_TARGET_BRANCH" == "main" ]] +then + towncrier build --yes +fi + if [[ "$CIRCLE_BRANCH" =~ ^main$ && -z "$CI_PULL_REQUEST" ]] then # List available documentation versions if on main - python build_tools/circle/list_versions.py > doc/versions.rst + python build_tools/circle/list_versions.py --json doc/js/versions.json --rst doc/versions.rst fi # The pipefail is requested to propagate exit code set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt -# Insert the version warning for deployment -find _build/html/stable -name "*.html" | xargs sed -i '/<\/body>/ i \ -\ ' - cd - set +o pipefail affected_doc_paths() { + scikit_learn_version=$(python -c 'import re; import sklearn; print(re.sub(r"(\d+\.\d+).+", r"\1", sklearn.__version__))') files=$(git diff --name-only origin/main...$CIRCLE_SHA1) - echo "$files" | grep ^doc/.*\.rst | sed 's/^doc\/\(.*\)\.rst$/\1.html/' + # use sed to replace files ending by .rst or .rst.template by .html + echo "$files" | grep -vP 'upcoming_changes/.*/\d+.*\.rst' | grep ^doc/.*\.rst | \ + sed 's/^doc\/\(.*\)\.rst$/\1.html/; s/^doc\/\(.*\)\.rst\.template$/\1.html/' + # replace towncrier fragment files by link to changelog. uniq is used + # because in some edge cases multiple fragments can be added and we want a + # single link to the changelog. + echo "$files" | grep -P 'upcoming_changes/.*/\d+.*\.rst' | sed "s@.*@whats_new/v${scikit_learn_version}.html@" | uniq + echo "$files" | grep ^examples/.*.py | sed 's/^\(.*\)\.py$/auto_\1.html/' sklearn_files=$(echo "$files" | grep '^sklearn/') if [ -n "$sklearn_files" ] @@ -244,7 +268,7 @@ then ( echo '
    ' echo "$affected" | sed 's|.*|
  • & [dev, stable]
  • |' - echo '

General: Home | API Reference | Examples

' + echo '

General: Home | API Reference | Examples

' echo 'Sphinx Warnings in affected files
    ' echo "$warnings" | sed 's/\/home\/circleci\/project\//
  • /g' echo '
' diff --git a/build_tools/circle/doc_environment.yml b/build_tools/circle/doc_environment.yml index 4df22341635a3..bc36e178de058 100644 --- a/build_tools/circle/doc_environment.yml +++ b/build_tools/circle/doc_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - numpy - blas - scipy @@ -14,7 +14,7 @@ dependencies: - matplotlib - pandas - pyamg - - pytest<8 + - pytest - pytest-xdist - pillow - pip @@ -33,7 +33,12 @@ dependencies: - polars - pooch - sphinxext-opengraph + - sphinx-remove-toctrees + - sphinx-design + - pydata-sphinx-theme + - towncrier - pip - pip: - jupyterlite-sphinx - jupyterlite-pyodide-kernel + - sphinxcontrib-sass diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock index baccc168b059d..76f56da3a9681 100644 --- a/build_tools/circle/doc_linux-64_conda.lock +++ b/build_tools/circle/doc_linux-64_conda.lock @@ -1,322 +1,332 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: b57888763997b08b2f240b5ff1ed6afcf88685f3d8c791ea8eba4d80483c43d0 +# input_hash: 93cb6f7aa17dce662512650f1419e87eae56ed49163348847bf965697cd268bb @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 -https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h2af2641_106.conda#b97e137a252f112b8d5fadb313bd8ec9 -https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h2af2641_106.conda#647bd9d44ad216d410329e659c898d8f -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda#3cfab3e709f77e9f1b3d380eb622494a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-he073ed8_18.conda#ad8527bf134a90e1c9ed35fa0b64318c +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-hc881cc4_6.conda#aae89d3736661c36a5591788aebd0817 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha885e6a_0.conda#800a4c872b5bc06fa83888d112fe6c4f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-13.3.0-hc03c837_102.conda#4c1d6961a6a54f602ae510d9bf31fa60 +https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda#434ca7e50e40f4918ab701e3facd59a0 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda#06d02030237f4d5b3d9a7e7d348fe3c6 +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-13.3.0-hc03c837_102.conda#aa38de2738c5f4a72a880e3d31ffe8b4 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.17-h0157908_18.conda#460eba7851277ec1fd80a1a24080787a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.43-h4bf12b8_4.conda#ef67db625ad0d2dce398837102f875ed https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_0.conda#a05c7712be80622934f7011e0a1d43fc -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hdade7a5_3.conda#2d9a60578bc28469d9aeef9aea5520c3 -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda#df88796bd09a0d2ed292e59101478ad8 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 -https://conda.anaconda.org/conda-forge/linux-64/aom-3.8.2-h59595ed_0.conda#625e1fed28a5139aed71b3a76117ef84 -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645 +https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda#c151d5eb730e9b7480e6d48c0fc44048 +https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_2.conda#7df50d44d4a14d6c31a2c54f2cd92157 +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.43-h4852527_4.conda#29782348a527eda3ecfc673109d28e93 +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.43-h4852527_4.conda#c87e146f5b685672d4aa6b527c6d3b5e +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.14-hb9d3cd8_0.conda#76df83c2a9035c54df5d04ff81bcc02d +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda#41b599ed2b02abcfdd84302bff174b23 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda#418c6ca5929a611cbd69204907a83995 -https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.1-h5888daf_0.conda#bfd56492d8346d669010eccafe0ba058 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.0-h5888daf_0.conda#d6845ae4dea52a2f90178bf1829a21f8 https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c -https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-hd590300_3.conda#5aeabe88534ea4169d4c49998f293d6c https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.3-h59595ed_0.conda#5e97e271911b8b2001a8b71860c32faa -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-h43f5ff8_6.conda#e54a5ddc67e673f9105cf2a2e9c070b0 -https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.1.0-h00ab1b0_0.conda#88928158ccfe797eac29ef5e03f7d23d -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda#9566f0bd264fbd463002e759b8a82401 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb9d3cd8_2.conda#06f70867945ea6a84d35836af780f1de +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.2.0-hf40a0c7_0.conda#2f433d593a66044c3f163cb25f0a09de https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-h2af2641_6.conda#1cf0b420341bb1a7b7f34f6e0f4bbf2b +https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-13.3.0-he8ea267_2.conda#2b6cdf7bb95d3d10ef4e38ce0bc95dba +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 -https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.0-h00ab1b0_0.conda#b048701d52e7cbb5f59ddd4d3b17bbf5 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.2.0-h266115a_0.conda#db22a0962c953e81a2a679ecb1fc6027 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.0-h29eaf8c_0.conda#d2f1c87d4416d1e7344cf92b1aaee1c4 https://conda.anaconda.org/conda-forge/linux-64/rav1e-0.6.6-he8a937b_2.conda#77d9955b4abddb811cb8ab1aa7d743e4 -https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.0-hdb0a2a9_1.conda#843bbb8ace1d64ac50d64639ff38b014 -https://conda.anaconda.org/conda-forge/linux-64/svt-av1-2.0.0-h59595ed_0.conda#207e01ffa0eb2d2efb83fb6f46365a21 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.1-h59595ed_0.conda#fd486bffbf0d6841cf1456a8f2e3a995 -https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.7-h0b41bf4_0.conda#49e8329110001f04923fe7e864990b0c -https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h1562d66_6.conda#5e4e8358a4ab43498e0ac3b6776d1c94 -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 -https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.0.4-hd9d6309_2.conda#a8c65cba5f77abc1f2e85ab9a0e614aa -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_6.conda#3666a850342f8f3be88f9a93d948d027 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.6-h232c23b_2.conda#9a3a42df8a95f65334dfc7b80da1195d -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-h8bd8927_1.conda#3b3e64af585eadfb52bb90b553db5edf +https://conda.anaconda.org/conda-forge/linux-64/svt-av1-3.0.2-h5888daf_0.conda#0096882bd623e6cc09e8bf920fc8fb47 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 -https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.5-hc2324a3_1.conda#11d76bee958b1989bd1ac6ee7372ea6d -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba -https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.14.4-hb4ffafa_1.conda#84eb54e92644c328e087e1c725773317 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb -https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_6.conda#ec683e084ea08ef94528f15d30fa1e03 -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6477408_3.conda#7a53f84c45bdf4656ba27b9e9ed68b3d -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h6d6b2fb_6.conda#d6c441226a4bd0af4c024e8c0f4a47cf -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h1562d66_6.conda#5ad72ddd14e13d589dea2afe6e626619 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.0-hf2295e7_6.conda#9342e7c44c38bea649490f72d92c382d -https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.10.2-hcae5a98_0.conda#901db891e1e21afd8524cd636a8c8e3b -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef -https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.3-h2448989_0.conda#927b6d6e80b2c0d4405a58b61ca248a3 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.3-h4dfa4b3_0.conda#d39965123dffcad4d750989be65bcb7c -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f -https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_0.conda#fad1d0a651bf929c6c16fbf1f6ccfa7c -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_6.conda#84b517f4f53e56256dbd65133aae04ac -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h617cb40_3.conda#3a9e5b8a6f651ff14e74d896d8f04ab6 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.0-hde27a5a_6.conda#a9d23c02485c5cf055f9ac90eb9c9c63 -https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_6.conda#0d977804df65082e17c860600ca2894b -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h4a1b8e8_3.conda#9ec22c7c544f4a4f6d660f0a3b0fd15c -https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_1.conda#a37843723437ba75f42c9270ffe800b1 +https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.1-h5888daf_2.conda#e0409515c467b87176b070bff5d9442e +https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.2.4-h7955e40_0.conda#c8a816dbf59eb8ba6346a8f10014b302 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/aom-3.9.1-hac33072_0.conda#346722a0be40f6edc53f12640d301338 +https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda#2c2fae981fd2afd00812c92ac47d023d +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda#c63b5e52939e795ba8d26e35d767a843 +https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.15.2-h3122c55_1.conda#2bc8d76acd818d7e79229f5157d5c156 +https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-13.3.0-h1e990d8_2.conda#f46cf0acdcb6019397d37df1e407ab91 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.3-h59595ed_0.conda#5e97e271911b8b2001a8b71860c32faa +https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.124-hb9d3cd8_0.conda#8bc89311041d7fcb510238cf0848ccae +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.11.1-h7b0646d_1.conda#959fc2b6c0df7883e070b3fe525219a5 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda#0a4d0252248ef9a0f88f2ba8b8a08e12 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a +https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.2.0-he0572af_0.conda#93340b072c393d23c4700a1d40565dca +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hc749103_2.conda#31614c73d7b103ef76faa4d83d261d34 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.17-hd6af730_0_cpython.conda#7bb89638dae9ce1b8e051d0b721e83c2 +https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda#353823361b1d27eb3960efb076dfcaf6 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda#1c74ff8c35dcadf952a16f752ca5aa49 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.12-h4f16b4b_0.conda#db038ce880f100acc74dba10302b5630 +https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda#1fd9696649f65fd6611fcdb4ffec738a +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda#98514fe74548d768907ce7a13f680e8f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py310hf71b8c6_2.conda#bf502c169c71e3c6ac0d6175addfacc2 +https://conda.anaconda.org/conda-forge/noarch/certifi-2025.4.26-pyhd8ed1ab_0.conda#c33eeaaa33f45031be34cda513df39b6 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.2-pyhd8ed1ab_0.conda#40fe4284b8b5835a9073a645139f35af +https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda#f22f4d4970e09d68a10b922cbb0408d3 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cpython-3.10.17-py310hd8ed1ab_0.conda#e2b81369f0473107784f8b7da8e6a8e9 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.12-py310had8cdd9_0.conda#b630fe36f0b621d23e74872dc4fd2bd7 +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda#24c1ca34138ee57de72a943237cde4cc +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/linux-64/gcc-13.3.0-h9576a4e_2.conda#d92e51bf4b6bdbfe45e5884fb0755afe +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-13.3.0-hc28eda2_10.conda#d151142bbafe5e68ec7fc065c5e6f80c +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-13.3.0-h84c1745_2.conda#4e21ed177b76537067736f20f54fee0a +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-13.3.0-hae580e1_2.conda#b55f02540605c322a47719029f8404cc +https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e +https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac +https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda#39a4f67be3286c86d696df570b1201b7 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.3-default_h5d6823c_0.conda#5fff487759736b275dc3e4a263cac666 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py310h3788b33_0.conda#4186d9b4d004b0fe0de6aa62496fb48a +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 +https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.2.1-hbb36593_2.conda#971387a27e61235b97cacb440a37e991 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda#728dbebd0f7a20337218beacffd37916 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 -https://conda.anaconda.org/conda-forge/linux-64/libpq-16.2-h33b98f1_1.conda#9e49ec2a61d02623b379dc332eb6889d -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.84.1-h2ff4ddf_0.conda#0305434da649d4fb48a425e588b79ea6 +https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda#c8013e438185f33b13814c5c488acd5c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h4bc477f_1.conda#ad1f1f8238834cd3c88ceeaee8da444a +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py310h89163eb_1.conda#8ce3f0332fd6de0d737e2911d329523f +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/networkx-3.2.1-pyhd8ed1ab_0.conda#425fce3b531bed6ec3c74fab3e5f0a1c -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.2.1-pyhd8ed1ab_0.conda#d478a8a3044cdff1aa6e62f9269cefe0 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.17.2-pyhd8ed1ab_0.conda#140a7f159396547e9799aa98f9f0742e -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d -https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/narwhals-1.38.0-pyhe01879c_0.conda#6d3bd92df4504d07c0ab7cfb81d7e4b1 +https://conda.anaconda.org/conda-forge/noarch/networkx-3.4.2-pyh267e887_2.conda#fd40bf7f7f4bc4b647dc8512053d9873 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.29-pthreads_h6ec200e_0.conda#7e4d48870b3258bea920d51b7f495a81 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda#e57da6fe54bb3a5556cf36d199ff07d8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/linux-64/psutil-7.0.0-py310ha75aee5_0.conda#da7d592394ff9084a23f62a1186451a2 +https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda#12c566707c80111f9799308d9e265aef +https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.1-pyhd8ed1ab_0.conda#232fb4577b6687b2d503ef8e254270c9 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 -https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.7-pyhd8ed1ab_0.conda#fb32097c717486aa34b38a9db57eb49e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda#fa839b5ff59e192f411ccc7dae6588bb +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda#959484a66b4b76befcddc4fa97c95567 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py310ha75aee5_0.conda#166d59aab40b9c607b4cc21c03924e9d +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda#83fc6ae00127671e301c9f44254c31b8 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-16.0.0-py310ha75aee5_0.conda#1d7a4b9202cdd10d56ecdd7f6c347190 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.44-hb9d3cd8_0.conda#7c91bfc90672888259675ad2ad28af9c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda#0c3cc595284c5e8f0f9900a9b228a332 +https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_1.conda#74ac5069774cdbc53910ec4d631a3999 +https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda#0a01c169f0ab0f91b26e77a3301fbfe4 https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_0.conda#b4537c98cb59f8725b0e1e65816b4a28 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hd1e30aa_0.conda#79f5dd8778873faa54e8f7b2729fe8a6 -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_0.conda#7ef7c0f111dad1c8006504a0f1ccd820 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.0-hf2295e7_6.conda#a1e026a82a562b443845db5614ca568a -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 -https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/plotly-5.21.0-pyhd8ed1ab_0.conda#c8f5835e6c3a850d9a000d23056d780b -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_0.conda#81458b3aed8ab8711951ec3c0c04e097 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.1-h98fc4e7_1.conda#b04b5cdf3ba01430db27979250bc5a1d -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.4.0-h3d44ed6_0.conda#27f46291a6aaa3c2a4f798ebd35a7ddb -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e -https://conda.anaconda.org/conda-forge/noarch/lazy_loader-0.4-pyhd8ed1ab_0.conda#a284ff318fbdb0dd83928275b4b6087c -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.1-hfa15dee_1.conda#a6dd2bbc684913e2bef0a54ce56fcbfb -https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.1.1-py39ha98d97a_6.conda#9ada409e8a8202f848abfed8e4e3f6be -https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hddac248_0.conda#259c4e76e6bda8888aefc098ae1ba749 -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 -https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.23-py39ha963410_0.conda#4871f09d653e979d598d2d4cd5fa868d -https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h44dd56e_1.conda#d037c20e3da2e85f03ebd20ad480c359 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py39h474f0d3_0.conda#46ae0ecba9726ab4fa44c78fefa522cf -https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39he9076e7_0.conda#1919384a8420e7bb25f6c3a582e0857c -https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39hda80f44_0.conda#f225666c47726329201b604060f1436c -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.1-py39h44dd56e_0.conda#dc565186b972bd87e49b9c35390ddd8c -https://conda.anaconda.org/conda-forge/noarch/tifffile-2024.4.18-pyhd8ed1ab_0.conda#9640ec921dce12e87e589ac634c7bd8a -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.22.0-py39hddac248_2.conda#8d502a4d2cbe5a45ff35ca8af8cbec0a -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_0.conda#0918a9201e824211cdf444dbf8d55752 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_0.conda#c66d2da2669fddc657b679bccab95775 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_0.conda#fd31ebf5867914de597f9961c478e482 -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_0.conda#1ad3afced398492586ca1bef70328be4 -https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.15.0-pyhd8ed1ab_0.conda#1a49ca9515ef9a96edff2eea06143dc6 +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.9.0-h2b85faf_0.conda#3cb814f83f1f71ac1985013697f80cc1 +https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.1-py310h8deb56e_0.conda#1fc24a3196ad5ede2a68148be61894f4 +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.57.0-py310h89163eb_0.conda#34378af82141b3c1725dcdf898b28fc6 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/linux-64/gfortran-13.3.0-h9576a4e_2.conda#19e6d3c9cde10a0a9a170a684082588e +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-13.3.0-hb919d3a_10.conda#7ce070e3329cd10bf79dbed562a21bd4 +https://conda.anaconda.org/conda-forge/linux-64/gxx-13.3.0-h9576a4e_2.conda#07e8df00b7cd3084ad3ef598ce32a71c +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-13.3.0-h6834431_10.conda#9a8ebde471cec5cc9c48f8682f434f92 +https://conda.anaconda.org/conda-forge/noarch/h2-4.2.0-pyhd8ed1ab_0.conda#b4754fb1bdcb70c8fd54f918301582c6 +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda#f4b39bf00c69f56ac01e020ebfac066c +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.5.2-pyhd8ed1ab_0.conda#c85c76dc67d75619a92f51dfbce06992 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda#abb32c727da370c481a1c206f5159ce9 +https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda#928b8be80851f5d8ffb016f9c81dae7a +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda#452b98eafe050ecff932f0ec832dd03f +https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.4-he9d0ab4_0.conda#96c33bbd084ef2b2463503fb7f1482ae +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.9.2-h65c71a3_0.conda#d045b1d878031eb497cab44e6392b1df +https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda#e71f31f8cfb0a91439f2086fc8aa0461 +https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_1.conda#71abbefb6f3b95e1668cd5e0af3affb9 +https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c +https://conda.anaconda.org/conda-forge/noarch/plotly-6.0.1-pyhd8ed1ab_0.conda#37ce02c899ff42ac5c554257b1a5906e +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/noarch/python-gil-3.10.17-hd8ed1ab_0.conda#c856adbd93a57004e21cd26564f4f724 +https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.13.2-h0e9735f_0.conda#568ed1300869dca0ba09fb750cda5dbb +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.5-hb9d3cd8_0.conda#eb44b3b6deb1cab08d72cb61686fe64c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcomposite-0.4.6-hb9d3cd8_2.conda#d3c295b50f092ab525ffe3c2aa4b7413 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcursor-1.2.3-hb9d3cd8_0.conda#2ccd714aa2242315acaf0a67faea780b +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda#b5fcc7172d22516e1f965490e65e33a4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.6-hb9d3cd8_0.conda#5efa5fa6243a622445fdfd72aee15efa +https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda#aaa2a381ccc56eac91d63b6c1240312f +https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.4-pyha770c72_0.conda#9f07c4fc992adb2d6c30da7fab3959a7 +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.9.0-h1a2810e_0.conda#1ce8b218d359d9ed0ab481f2a3f3c512 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.9.0-h36df796_0.conda#cc0cf942201f9d3b0e9654ea02e12486 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.5.2-pyhd8ed1ab_0.conda#e376ea42e9ae40f3278b0f79c9bf9826 +https://conda.anaconda.org/conda-forge/noarch/lazy-loader-0.4-pyhd8ed1ab_2.conda#d10d9393680734a8febc4b362a4c94f2 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.4-default_h1df26ce_0.conda#96f8d5b2e94c9ba4fef19f1adf068a15 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-20.1.4-default_he06ed0a_0.conda#2d933632c8004be47deb2be61bf013be +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-31_he2f377e_openblas.conda#7e5fff7d0db69be3a266f7e79a3bb0e2 +https://conda.anaconda.org/conda-forge/linux-64/libpq-17.4-h27ae623_1.conda#37fba334855ef3b51549308e61ed7a3d +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.5-py310hefbff90_0.conda#5526bc875ec897f0d335e38da832b6ee +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py310h7e6dc6c_0.conda#14d300b9e1504748e70cc6499a7b4d25 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f +https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py310ha75aee5_2.conda#f9254b5b0193982416b91edcb4b2676f +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-31_h1ea3ea9_openblas.conda#ba652ee0576396d4765e567f043c57f9 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda#09262e66b19567aff4f592fb53b28760 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.9.0-ha770c72_0.conda#5859096e397aba423340d0bbbb11ec64 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.3.2-py310h3788b33_0.conda#b6420d29123c7c823de168f49ccdfe6a +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.12.30-py310h78a9a29_0.conda#e0c50079904122427bcf52e1afcd1cdb +https://conda.anaconda.org/conda-forge/noarch/imageio-2.37.0-pyhfb79c49_0.conda#b5577bc2212219566578fd5af9993af6 +https://conda.anaconda.org/conda-forge/noarch/lazy_loader-0.4-pyhd8ed1ab_2.conda#bb0230917e2473c77d615104dbe8a49d +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py310h5eaa309_3.conda#07697a584fab513ce895c4511f7a2403 +https://conda.anaconda.org/conda-forge/noarch/patsy-1.0.1-pyhd8ed1ab_1.conda#ee23fabfd0a8c6b8d6f3729b47b2859d +https://conda.anaconda.org/conda-forge/linux-64/polars-1.27.1-py39h2a4a510_3.conda#fba08963eaa1f954480045d033d1221e +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.8.0-py310hf462985_0.conda#4c441eff2be2e65bd67765c5642051c5 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.15.2-py310h1d65ade_0.conda#8c29cd33b64b2eb78597fa28b5595c8d +https://conda.anaconda.org/conda-forge/noarch/towncrier-24.8.0-pyhd8ed1ab_1.conda#820b6a1ddf590fba253f8204f7200d82 +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.4.0-pyhd8ed1ab_0.conda#c1e349028e0052c4eea844e94f773065 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.131-openblas.conda#38b2ec894c69bb4be0e66d2ef7fc60bf +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-11.1.0-h3beb420_0.conda#95e3bb97f9cdc251c0c68640e9c10ed3 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.10.1-py310h68603db_0.conda#29cf3f5959afb841eda926541f26b0fb +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py310ha2bacc8_1.conda#817d32861729e14f474249f1036291c4 +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_1.conda#a9b9368f3701a417eac9edbcae7cb737 +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.4-py310hf462985_0.conda#636d3c500d8a851e377360e88ec95372 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2025.3.30-pyhd8ed1ab_0.conda#14f46147fae19bb867f82a787c7059e9 +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.2-pyhd8ed1ab_1.conda#b3e783e8e8ed7577cf0b6dee37d1fbac +https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.9.0-h6441bc3_1.conda#4029a8dcb1d97ea241dbe5abfda1fad6 +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.25.2-py310h5eaa309_0.conda#4cc3a231679ecb3c0ba20ebf3c27d12e +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_3.conda#fd96da444e81f9e6fcaac38590f3dd42 +https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.9.0-py310hfd10a26_0.conda#1610ccfe262ee519716bb69bd4395572 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_3.conda#62afb877ca2c2b4b6f9ecb37320085b6 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.10.1-py310hff52083_0.conda#45c1ad6a0351492b56d1b2bb5442cdfa +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_1.conda#5af206d64d18d6c8dfb3122b4d9e643b +https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.1-pyhd8ed1ab_0.conda#837aaf71ddf3b27acae0e7e9015eebc6 +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_1.conda#bf22cb9c439572760316ce0748af3713 +https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.6.1-pyhd8ed1ab_2.conda#3e6c15d914b03f83fc96344f917e0838 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.19.0-pyhd8ed1ab_0.conda#3cfa26d23bd7987d84051879f202a855 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 -https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e -https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_0.conda#286283e05a1eff606f55e7cd70f6d7f7 -# pip attrs @ https://files.pythonhosted.org/packages/e0/44/827b2a91a5816512fcaf3cc4ebc465ccd5d598c45cefa6703fcf4a79018f/attrs-23.2.0-py3-none-any.whl#sha256=99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 -# pip cloudpickle @ https://files.pythonhosted.org/packages/96/43/dae06432d0c4b1dc9e9149ad37b4ca8384cf6eb7700cd9215b177b914f0a/cloudpickle-3.0.0-py3-none-any.whl#sha256=246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7 +https://conda.anaconda.org/conda-forge/noarch/sphinx-remove-toctrees-1.0.0.post1-pyhd8ed1ab_1.conda#b275c865b753413caaa8548b9d44c024 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda#16e3f039c0aa6446513e94ab18a8784b +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda#910f28a05c178feba832f842155cbfff +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda#e9fb3fe8a5b758b4aff187d434f94f03 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda#00534ebcc0375929b45c3039b5ba7636 +https://conda.anaconda.org/conda-forge/noarch/sphinx-8.1.3-pyhd8ed1ab_1.conda#1a3281a0dc355c02b5506d87db2d78ac +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda#3bc61f7161d28137797e038263c04c54 +https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_1.conda#79f5d05ad914baf152fb7f75073fe36d +# pip attrs @ https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl#sha256=427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 +# pip cloudpickle @ https://files.pythonhosted.org/packages/7e/e8/64c37fadfc2816a7701fa8a6ed8d87327c7d54eacfbfb6edab14a2f2be75/cloudpickle-3.1.1-py3-none-any.whl#sha256=c8c5a44295039331ee9dad40ba100a9c7297b6f988e50e87ccdf3765a668350e # pip defusedxml @ https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl#sha256=a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61 -# pip fastjsonschema @ https://files.pythonhosted.org/packages/9c/b9/79691036d4a8f9857e74d1728b23f34f583b81350a27492edda58d5604e1/fastjsonschema-2.19.1-py3-none-any.whl#sha256=3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0 +# pip fastjsonschema @ https://files.pythonhosted.org/packages/90/2b/0817a2b257fe88725c25589d89aec060581aabf668707a8d03b2e9e0cb2a/fastjsonschema-2.21.1-py3-none-any.whl#sha256=c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667 # pip fqdn @ https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl#sha256=3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014 -# pip json5 @ https://files.pythonhosted.org/packages/8a/3c/4f8791ee53ab9eeb0b022205aa79387119a74cc9429582ce04098e6fc540/json5-0.9.25-py3-none-any.whl#sha256=34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f -# pip jsonpointer @ https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl#sha256=15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a +# pip json5 @ https://files.pythonhosted.org/packages/41/9f/3500910d5a98549e3098807493851eeef2b89cdd3032227558a104dfe926/json5-0.12.0-py3-none-any.whl#sha256=6d37aa6c08b0609f16e1ec5ff94697e2cbbfbad5ac112afa05794da9ab7810db +# pip jsonpointer @ https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl#sha256=13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942 # pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl#sha256=841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780 -# pip mistune @ https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl#sha256=71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 +# pip libsass @ https://files.pythonhosted.org/packages/fd/5a/eb5b62641df0459a3291fc206cf5bd669c0feed7814dded8edef4ade8512/libsass-0.23.0-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl#sha256=4a218406d605f325d234e4678bd57126a66a88841cb95bee2caeafdc6f138306 +# pip mdurl @ https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl#sha256=84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 # pip overrides @ https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl#sha256=c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49 # pip pandocfilters @ https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl#sha256=93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc -# pip pkginfo @ https://files.pythonhosted.org/packages/56/09/054aea9b7534a15ad38a363a2bd974c20646ab1582a387a95b8df1bfea1c/pkginfo-1.10.0-py3-none-any.whl#sha256=889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 -# pip prometheus-client @ https://files.pythonhosted.org/packages/c7/98/745b810d822103adca2df8decd4c0bbe839ba7ad3511af3f0d09692fc0f0/prometheus_client-0.20.0-py3-none-any.whl#sha256=cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7 +# pip pkginfo @ https://files.pythonhosted.org/packages/fa/3d/f4f2ba829efb54b6cd2d91349c7463316a9cc55a43fc980447416c88540f/pkginfo-1.12.1.2-py3-none-any.whl#sha256=c783ac885519cab2c34927ccfa6bf64b5a704d7c69afaea583dd9b7afe969343 +# pip prometheus-client @ https://files.pythonhosted.org/packages/ff/c2/ab7d37426c179ceb9aeb109a85cda8948bb269b7561a0be870cc656eefe4/prometheus_client-0.21.1-py3-none-any.whl#sha256=594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301 # pip ptyprocess @ https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 -# pip pycparser @ https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl#sha256=c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc -# pip python-json-logger @ https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl#sha256=f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd -# pip pyyaml @ https://files.pythonhosted.org/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c +# pip python-json-logger @ https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl#sha256=dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7 +# pip pyyaml @ https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed # pip rfc3986-validator @ https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl#sha256=2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9 -# pip rpds-py @ https://files.pythonhosted.org/packages/fd/ea/92231b62681961812e9fbd8ef9be7137856784406bf6a384976bb7b46472/rpds_py-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9 +# pip rpds-py @ https://files.pythonhosted.org/packages/a7/a7/6d04d438f53d8bb2356bb000bea9cf5c96a9315e405b577117e344cc7404/rpds_py-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1b221c2457d92a1fb3c97bee9095c874144d196f47c038462ae6e4a14436f7bc # pip send2trash @ https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl#sha256=0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9 # pip sniffio @ https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl#sha256=2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 -# pip soupsieve @ https://files.pythonhosted.org/packages/4c/f3/038b302fdfbe3be7da016777069f26ceefe11a681055ea1f7817546508e3/soupsieve-2.5-py3-none-any.whl#sha256=eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7 # pip traitlets @ https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl#sha256=b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f -# pip types-python-dateutil @ https://files.pythonhosted.org/packages/c7/1b/af4f4c4f3f7339a4b7eb3c0ab13416db98f8ac09de3399129ee5fdfa282b/types_python_dateutil-2.9.0.20240316-py3-none-any.whl#sha256=6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b +# pip types-python-dateutil @ https://files.pythonhosted.org/packages/0f/b3/ca41df24db5eb99b00d97f89d7674a90cb6b3134c52fb8121b6d8d30f15c/types_python_dateutil-2.9.0.20241206-py3-none-any.whl#sha256=e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53 # pip uri-template @ https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl#sha256=a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363 -# pip webcolors @ https://files.pythonhosted.org/packages/d5/e1/3e9013159b4cbb71df9bd7611cbf90dc2c621c8aeeb677fc41dad72f2261/webcolors-1.13-py3-none-any.whl#sha256=29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf +# pip webcolors @ https://files.pythonhosted.org/packages/60/e8/c0e05e4684d13459f93d312077a9a2efbe04d59c393bc2b8802248c908d4/webcolors-24.11.1-py3-none-any.whl#sha256=515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9 # pip webencodings @ https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl#sha256=a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 # pip websocket-client @ https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl#sha256=17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526 -# pip anyio @ https://files.pythonhosted.org/packages/14/fd/2f20c40b45e4fb4324834aea24bd4afdf1143390242c0b33774da0e2e34f/anyio-4.3.0-py3-none-any.whl#sha256=048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8 +# pip anyio @ https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl#sha256=9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c +# pip argon2-cffi-bindings @ https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae # pip arrow @ https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl#sha256=c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80 -# pip beautifulsoup4 @ https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl#sha256=b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed -# pip bleach @ https://files.pythonhosted.org/packages/ea/63/da7237f805089ecc28a3f36bca6a21c31fcbc2eb380f3b8f1be3312abd14/bleach-6.1.0-py3-none-any.whl#sha256=3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6 -# pip cffi @ https://files.pythonhosted.org/packages/ea/ac/e9e77bc385729035143e54cc8c4785bd480eaca9df17565963556b0b7a93/cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 # pip doit @ https://files.pythonhosted.org/packages/44/83/a2960d2c975836daa629a73995134fd86520c101412578c57da3d2aa71ee/doit-0.36.0-py3-none-any.whl#sha256=ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a # pip jupyter-core @ https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl#sha256=4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409 -# pip referencing @ https://files.pythonhosted.org/packages/8f/ad/0a39c92d2d2769eb02adfdd50282e25341dccee3a14753c972d7327de664/referencing-0.35.0-py3-none-any.whl#sha256=8080727b30e364e5783152903672df9b6b091c926a146a759080b62ca3126cd6 +# pip markdown-it-py @ https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl#sha256=355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 +# pip mistune @ https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl#sha256=1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9 +# pip pyzmq @ https://files.pythonhosted.org/packages/c1/3e/2de5928cdadc2105e7c8f890cc5f404136b41ce5b6eae5902167f1d5641c/pyzmq-26.4.0-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=7dacb06a9c83b007cc01e8e5277f94c95c453c5851aac5e83efe93e72226353f +# pip referencing @ https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl#sha256=e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0 # pip rfc3339-validator @ https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl#sha256=24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa +# pip sphinxcontrib-sass @ https://files.pythonhosted.org/packages/3f/ec/194f2dbe55b3fe0941b43286c21abb49064d9d023abfb99305c79ad77cad/sphinxcontrib_sass-0.3.5-py2.py3-none-any.whl#sha256=850c83a36ed2d2059562504ccf496ca626c9c0bb89ec642a2d9c42105704bef6 # pip terminado @ https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl#sha256=a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 -# pip tinycss2 @ https://files.pythonhosted.org/packages/2c/4d/0db5b8a613d2a59bbc29bc5bb44a2f8070eb9ceab11c50d477502a8a0092/tinycss2-1.3.0-py3-none-any.whl#sha256=54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7 -# pip argon2-cffi-bindings @ https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae +# pip tinycss2 @ https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl#sha256=3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289 +# pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea +# pip bleach @ https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl#sha256=117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e # pip isoduration @ https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl#sha256=b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042 -# pip jsonschema-specifications @ https://files.pythonhosted.org/packages/ee/07/44bd408781594c4d0a027666ef27fab1e441b109dc3b76b4f836f8fd04fe/jsonschema_specifications-2023.12.1-py3-none-any.whl#sha256=87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c +# pip jsonschema-specifications @ https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl#sha256=4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af +# pip jupyter-client @ https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl#sha256=e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f # pip jupyter-server-terminals @ https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl#sha256=41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa -# pip jupyterlite-core @ https://files.pythonhosted.org/packages/05/d2/1d59d9a70d684b1eb3eb3a0b80a36b4e1d691e94af5d53aee56b1ad5240b/jupyterlite_core-0.3.0-py3-none-any.whl#sha256=247cc34ae6fedda41b15ce4778997164508b2039bc92480665cadfe955193467 -# pip pyzmq @ https://files.pythonhosted.org/packages/2c/1f/044aafe62c85d579f87846f9cfd2cfce12a08ae72426ec92986171421d9f/pyzmq-26.0.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=c40b09b7e184d6e3e1be1c8af2cc320c0f9f610d8a5df3dd866e6e6e4e32b235 -# pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea -# pip jsonschema @ https://files.pythonhosted.org/packages/39/9d/b035d024c62c85f2e2d4806a59ca7b8520307f34e0932fbc8cc75fe7b2d9/jsonschema-4.21.1-py3-none-any.whl#sha256=7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f -# pip jupyter-client @ https://files.pythonhosted.org/packages/75/6d/d7b55b9c1ac802ab066b3e5015e90faab1fffbbd67a2af498ffc6cc81c97/jupyter_client-8.6.1-py3-none-any.whl#sha256=3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f -# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/83/bf/749279904094015d5cb7e030dd7a111f8b013b9f1809d954d04ebe0c1197/jupyterlite_pyodide_kernel-0.3.1-py3-none-any.whl#sha256=ac9d9dd95adcced57d465a7b298f220d8785845c017ad3abf2a3677ff02631c6 -# pip jupyter-events @ https://files.pythonhosted.org/packages/a5/94/059180ea70a9a326e1815176b2370da56376da347a796f8c4f0b830208ef/jupyter_events-0.10.0-py3-none-any.whl#sha256=4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960 +# pip jupyterlite-core @ https://files.pythonhosted.org/packages/46/15/1d9160819d1e6e018d15de0e98b9297d0a09cfcfdc73add6e24ee3b2b83c/jupyterlite_core-0.5.1-py3-none-any.whl#sha256=76381619a632f06bf67fb47e5464af762ad8836df5ffe3d7e7ee0e316c1407ee +# pip mdit-py-plugins @ https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl#sha256=0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636 +# pip jsonschema @ https://files.pythonhosted.org/packages/69/4a/4f9dbeb84e8850557c02365a0eee0649abe5eb1d84af92a25731c6c0f922/jsonschema-4.23.0-py3-none-any.whl#sha256=fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 +# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/1b/b5/959a03ca011d1031abac03c18af9e767c18d6a9beb443eb106dda609748c/jupyterlite_pyodide_kernel-0.5.2-py3-none-any.whl#sha256=63ba6ce28d32f2cd19f636c40c153e171369a24189e11e2235457bd7000c5907 +# pip jupyter-events @ https://files.pythonhosted.org/packages/e2/48/577993f1f99c552f18a0428731a755e06171f9902fa118c379eb7c04ea22/jupyter_events-0.12.0-py3-none-any.whl#sha256=6464b2fa5ad10451c3d35fabc75eab39556ae1e2853ad0c0cc31b656731a97fb # pip nbformat @ https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl#sha256=3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b -# pip nbclient @ https://files.pythonhosted.org/packages/66/e8/00517a23d3eeaed0513e718fbc94aab26eaa1758f5690fc8578839791c79/nbclient-0.10.0-py3-none-any.whl#sha256=f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f -# pip nbconvert @ https://files.pythonhosted.org/packages/23/8a/8d67cbd984739247e4b205c1143e2f71b25b4f71e180fe70f7cb2cf02633/nbconvert-7.16.3-py3-none-any.whl#sha256=ddeff14beeeedf3dd0bc506623e41e4507e551736de59df69a91f86700292b3b -# pip jupyter-server @ https://files.pythonhosted.org/packages/07/46/6bb926b3bf878bf687b952fb6a4c09d014b4575a25960f2cd1a61793763f/jupyter_server-2.14.0-py3-none-any.whl#sha256=fb6be52c713e80e004fac34b35a0990d6d36ba06fd0a2b2ed82b899143a64210 -# pip jupyterlab-server @ https://files.pythonhosted.org/packages/2f/b9/ed4ecad7cf1863a64920dc4c19b0376628b5d6bd28d2ec1e00cbac4ba2fb/jupyterlab_server-2.27.1-py3-none-any.whl#sha256=f5e26156e5258b24d532c84e7c74cc212e203bff93eb856f81c24c16daeecc75 -# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/38/c9/5f1142c005cf8d75830b10029e53f074324bc85cfca1f1d0f22a207b771c/jupyterlite_sphinx-0.9.3-py3-none-any.whl#sha256=be6332d16490ea2fa90b78187a2c5e1c357195966a25741d60b1790346571041 +# pip jupytext @ https://files.pythonhosted.org/packages/12/b7/e7e3d34c8095c19228874b1babedfb5d901374e40d51ae66f2a90203be53/jupytext-1.17.1-py3-none-any.whl#sha256=99145b1e1fa96520c21ba157de7d354ffa4904724dcebdcd70b8413688a312de +# pip nbclient @ https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl#sha256=4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d +# pip nbconvert @ https://files.pythonhosted.org/packages/cc/9a/cd673b2f773a12c992f41309ef81b99da1690426bd2f96957a7ade0d3ed7/nbconvert-7.16.6-py3-none-any.whl#sha256=1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b +# pip jupyter-server @ https://files.pythonhosted.org/packages/e2/a2/89eeaf0bb954a123a909859fa507fa86f96eb61b62dc30667b60dbd5fdaf/jupyter_server-2.15.0-py3-none-any.whl#sha256=872d989becf83517012ee669f09604aa4a28097c0bd90b2f424310156c2cdae3 +# pip jupyterlab-server @ https://files.pythonhosted.org/packages/54/09/2032e7d15c544a0e3cd831c51d77a8ca57f7555b2e1b2922142eddb02a84/jupyterlab_server-2.27.3-py3-none-any.whl#sha256=e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4 +# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/a9/f2/b64ad053b8b6fed95c46e8df85ee3349a1cca47e006eb6a65671c9a1c6e5/jupyterlite_sphinx-0.20.0-py3-none-any.whl#sha256=de2cb966f389d70cc269f501af24f0cbb1f47d521a89ee79ac83f0ad302214fc diff --git a/build_tools/circle/doc_min_dependencies_environment.yml b/build_tools/circle/doc_min_dependencies_environment.yml index 298a60e8ec4ff..1a93231019fbb 100644 --- a/build_tools/circle/doc_min_dependencies_environment.yml +++ b/build_tools/circle/doc_min_dependencies_environment.yml @@ -4,34 +4,39 @@ channels: - conda-forge dependencies: - - python=3.9 - - numpy=1.19.5 # min + - python=3.10 + - numpy=1.22.0 # min - blas - - scipy=1.6.0 # min + - scipy=1.8.0 # min - cython=3.0.10 # min - joblib - threadpoolctl - - matplotlib=3.3.4 # min - - pandas=1.1.5 # min - - pyamg - - pytest<8 + - matplotlib=3.5.0 # min + - pandas=1.4.0 # min + - pyamg=4.2.1 # min + - pytest - pytest-xdist - pillow - pip - ninja - meson-python - - scikit-image=0.17.2 # min + - scikit-image=0.19.0 # min - seaborn - memory_profiler - compilers - - sphinx=6.0.0 # min - - sphinx-gallery=0.15.0 # min + - sphinx=7.3.7 # min + - sphinx-gallery=0.17.1 # min - sphinx-copybutton=0.5.2 # min - numpydoc=1.2.0 # min - - sphinx-prompt=1.3.0 # min + - sphinx-prompt=1.4.0 # min - plotly=5.14.0 # min - - polars=0.19.12 # min - - pooch + - polars=0.20.30 # min + - pooch=1.6.0 # min + - sphinx-remove-toctrees=1.0.0.post1 # min + - sphinx-design=0.6.0 # min + - pydata-sphinx-theme=0.15.3 # min + - towncrier=24.8.0 # min - pip - pip: - - sphinxext-opengraph==0.4.2 # min + - sphinxext-opengraph==0.9.1 # min + - sphinxcontrib-sass==0.3.4 # min diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock index 69eca7785d55c..7801c08740653 100644 --- a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock @@ -1,248 +1,297 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 32601810330a8200864f7908d07d870a3a58931be4f833691b2b5c7937f2d330 +# input_hash: cf86af2534e8e281654ed19bc893b468656b355b2b200b12321dbc61cce562db @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 -https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h2af2641_106.conda#b97e137a252f112b8d5fadb313bd8ec9 -https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h2af2641_106.conda#647bd9d44ad216d410329e659c898d8f -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda#3cfab3e709f77e9f1b3d380eb622494a -https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2024.1.0-ha957f24_692.conda#b35af3f0f25498f4e9fc4c471910346c -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-he073ed8_18.conda#ad8527bf134a90e1c9ed35fa0b64318c +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-hc881cc4_6.conda#aae89d3736661c36a5591788aebd0817 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha885e6a_0.conda#800a4c872b5bc06fa83888d112fe6c4f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-13.3.0-hc03c837_102.conda#4c1d6961a6a54f602ae510d9bf31fa60 +https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda#434ca7e50e40f4918ab701e3facd59a0 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda#06d02030237f4d5b3d9a7e7d348fe3c6 +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-13.3.0-hc03c837_102.conda#aa38de2738c5f4a72a880e3d31ffe8b4 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.17-h0157908_18.conda#460eba7851277ec1fd80a1a24080787a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.43-h4bf12b8_4.conda#ef67db625ad0d2dce398837102f875ed https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_0.conda#a05c7712be80622934f7011e0a1d43fc -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hdade7a5_3.conda#2d9a60578bc28469d9aeef9aea5520c3 -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-hc881cc4_6.conda#df88796bd09a0d2ed292e59101478ad8 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda#c151d5eb730e9b7480e6d48c0fc44048 +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.43-h4852527_4.conda#29782348a527eda3ecfc673109d28e93 +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.43-h4852527_4.conda#c87e146f5b685672d4aa6b527c6d3b5e +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.14-hb9d3cd8_0.conda#76df83c2a9035c54df5d04ff81bcc02d +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.24.1-h5888daf_0.conda#d54305672f0361c2f3886750e7165b5f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda#41b599ed2b02abcfdd84302bff174b23 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.24.1-h5888daf_0.conda#2ee6d71b72f75d50581f2f68e965efdb +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.5-hd0c01bc_1.conda#68e52064ed3897463c0e958ab5c8f91b +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.5.2-hd0c01bc_0.conda#b64523fb87ac6f87f0790f324ad43046 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxshmfence-1.3.3-hb9d3cd8_0.conda#9a809ce9f65460195777f2f2116bae02 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 -https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c -https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/blis-0.9.0-h4ab18f5_2.conda#6f77ba1352b69c4a6f8a6d20def30e4e +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda#418c6ca5929a611cbd69204907a83995 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.0-h5888daf_0.conda#d6845ae4dea52a2f90178bf1829a21f8 +https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 +https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-hd590300_3.conda#5aeabe88534ea4169d4c49998f293d6c https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-h43f5ff8_6.conda#e54a5ddc67e673f9105cf2a2e9c070b0 -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e -https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.24.1-h8e693c7_0.conda#57566a81dd1e5aa3d98ac7582e8bfe03 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda#9566f0bd264fbd463002e759b8a82401 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb9d3cd8_2.conda#06f70867945ea6a84d35836af780f1de +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.24.1-h5888daf_0.conda#8f04c7aae6a46503bc36d1ed5abc8c7c +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.55-h3f2d84a_0.conda#2bd47db5807daade8500ed7ca4c512a4 +https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.2.0-hf40a0c7_0.conda#2f433d593a66044c3f163cb25f0a09de https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-h2af2641_6.conda#1cf0b420341bb1a7b7f34f6e0f4bbf2b +https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-13.3.0-he8ea267_2.conda#2b6cdf7bb95d3d10ef4e38ce0bc95dba +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 -https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.0-h00ab1b0_0.conda#b048701d52e7cbb5f59ddd4d3b17bbf5 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.9-hc50e24c_0.conda#c7f302fd11eeb0987a6a5e1f3aed6a21 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.0.1-h266115a_6.conda#94116b69829e90b72d566e64421e1bff +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.36-h5888daf_0.conda#de9cd5bca9e4918527b9b72b6e2e1409 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.0-h29eaf8c_0.conda#d2f1c87d4416d1e7344cf92b1aaee1c4 +https://conda.anaconda.org/conda-forge/linux-64/rav1e-0.6.6-he8a937b_2.conda#77d9955b4abddb811cb8ab1aa7d743e4 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-h8bd8927_1.conda#3b3e64af585eadfb52bb90b553db5edf +https://conda.anaconda.org/conda-forge/linux-64/svt-av1-3.0.2-h5888daf_0.conda#0096882bd623e6cc09e8bf920fc8fb47 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae -https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h1562d66_6.conda#5e4e8358a4ab43498e0ac3b6776d1c94 -https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d -https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_6.conda#3666a850342f8f3be88f9a93d948d027 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.1-h5888daf_2.conda#e0409515c467b87176b070bff5d9442e +https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.2.4-h7955e40_0.conda#c8a816dbf59eb8ba6346a8f10014b302 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/aom-3.9.1-hac33072_0.conda#346722a0be40f6edc53f12640d301338 +https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda#2c2fae981fd2afd00812c92ac47d023d +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda#c63b5e52939e795ba8d26e35d767a843 +https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.15.2-h3122c55_1.conda#2bc8d76acd818d7e79229f5157d5c156 +https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-13.3.0-h1e990d8_2.conda#f46cf0acdcb6019397d37df1e407ab91 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.3-h59595ed_0.conda#5e97e271911b8b2001a8b71860c32faa +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.24.1-h8e693c7_0.conda#8f66ed2e34507b7ae44afa31c3e4ec79 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h66dfbfd_blis.conda#612d513ce8103e41dbcb4d941a325027 +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.75-h39aace5_0.conda#c44c16d6976d2aebbd65894d7741e67e +https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.124-hb9d3cd8_0.conda#8bc89311041d7fcb510238cf0848ccae +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-lib-1.11.0-hb9d3cd8_2.conda#e55712ff40a054134d51b89afca57dbc +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.11.1-h7b0646d_1.conda#959fc2b6c0df7883e070b3fe525219a5 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.6-h232c23b_2.conda#9a3a42df8a95f65334dfc7b80da1195d -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb -https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_6.conda#ec683e084ea08ef94528f15d30fa1e03 -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6477408_3.conda#7a53f84c45bdf4656ba27b9e9ed68b3d -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h6d6b2fb_6.conda#d6c441226a4bd0af4c024e8c0f4a47cf -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h1562d66_6.conda#5ad72ddd14e13d589dea2afe6e626619 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.0-hf2295e7_6.conda#9342e7c44c38bea649490f72d92c382d -https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h2fb2949_1000.conda#7e3726e647a619c6ce5939014dfde86d -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef -https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.3-h2448989_0.conda#927b6d6e80b2c0d4405a58b61ca248a3 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.3-h4dfa4b3_0.conda#d39965123dffcad4d750989be65bcb7c -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.0.1-he0572af_6.conda#9802ae6d20982f42c0f5d69008988763 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.111-h159eef7_0.conda#311e8370c9db254611ec87250f6370a0 +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.45-hc749103_0.conda#b90bece58b4c2bf25969b70f3be42d25 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.17-hd6af730_0_cpython.conda#7bb89638dae9ce1b8e051d0b721e83c2 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda#1c74ff8c35dcadf952a16f752ca5aa49 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.12-h4f16b4b_0.conda#db038ce880f100acc74dba10302b5630 https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb -https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_0.conda#fad1d0a651bf929c6c16fbf1f6ccfa7c -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a -https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca -https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.0.0-pyhd8ed1ab_0.conda#753d29fe41bb881e4b9c004f0abf973f -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_1.tar.bz2#adb733ec2ee669f6d010758d054da60f -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.3.1-pyhca7485f_0.conda#b7f0662ef2c9d4404f0af9eef5ed2fde -https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_6.conda#84b517f4f53e56256dbd65133aae04ac -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h617cb40_3.conda#3a9e5b8a6f651ff14e74d896d8f04ab6 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.0-hde27a5a_6.conda#a9d23c02485c5cf055f9ac90eb9c9c63 -https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_6.conda#0d977804df65082e17c860600ca2894b -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h4a1b8e8_3.conda#9ec22c7c544f4a4f6d660f0a3b0fd15c -https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/noarch/appdirs-1.4.4-pyhd8ed1ab_1.conda#f4e90937bbfc3a4a92539545a37bb448 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda#98514fe74548d768907ce7a13f680e8f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py310hf71b8c6_2.conda#bf502c169c71e3c6ac0d6175addfacc2 +https://conda.anaconda.org/conda-forge/noarch/certifi-2025.4.26-pyhd8ed1ab_0.conda#c33eeaaa33f45031be34cda513df39b6 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.2-pyhd8ed1ab_0.conda#40fe4284b8b5835a9073a645139f35af +https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda#f22f4d4970e09d68a10b922cbb0408d3 +https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.1-pyhd8ed1ab_0.conda#364ba6c9fb03886ac979b482f39ebb92 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py310hc6cd4ac_0.conda#bd1d71ee240be36f1d85c86177d6964f +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda#24c1ca34138ee57de72a943237cde4cc +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/fsspec-2025.3.2-pyhd8ed1ab_0.conda#9c40692c3d24c7aaf335f673ac09d308 +https://conda.anaconda.org/conda-forge/linux-64/gcc-13.3.0-h9576a4e_2.conda#d92e51bf4b6bdbfe45e5884fb0755afe +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-13.3.0-hc28eda2_10.conda#d151142bbafe5e68ec7fc065c5e6f80c +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.24.1-h5888daf_0.conda#c63e7590d4d6f4c85721040ed8b12888 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-13.3.0-h84c1745_2.conda#4e21ed177b76537067736f20f54fee0a +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-13.3.0-hae580e1_2.conda#b55f02540605c322a47719029f8404cc +https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda#0a802cb9888dd14eeefc611f05c40b6e +https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda#8e6923fc12f1fe8f8c4e5c9f343256ac +https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda#39a4f67be3286c86d696df570b1201b7 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 -https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 -https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.3-default_h5d6823c_0.conda#5fff487759736b275dc3e4a263cac666 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py310h3788b33_0.conda#4186d9b4d004b0fe0de6aa62496fb48a +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 +https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.2.1-hbb36593_2.conda#971387a27e61235b97cacb440a37e991 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_hba4ea11_blis.conda#1ea7ae3db0fea0c5222388d841583c51 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 -https://conda.anaconda.org/conda-forge/linux-64/libpq-16.2-h33b98f1_1.conda#9e49ec2a61d02623b379dc332eb6889d +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.84.1-h3618099_1.conda#714c97d4ff495ab69d1fdfcadbcae985 +https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda#c8013e438185f33b13814c5c488acd5c +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-12_hd37a5e2_netlib.conda#4b181b55915cefcd35c8398c9274e629 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-257.4-h4e0b6ca_1.conda#04bcf3055e51f8dde6fab9672fb9fca0 +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h4bc477f_1.conda#ad1f1f8238834cd3c88ceeaee8da444a https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py310h89163eb_1.conda#8ce3f0332fd6de0d737e2911d329523f +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 https://conda.anaconda.org/conda-forge/noarch/networkx-3.2-pyhd8ed1ab_0.conda#cec8cc498664cc00a070676aa89e69a7 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.2.1-pyhd8ed1ab_0.conda#d478a8a3044cdff1aa6e62f9269cefe0 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.17.2-pyhd8ed1ab_0.conda#140a7f159396547e9799aa98f9f0742e -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py39hd1e30aa_1.conda#37218233bcdc310e4fde6453bc1b40d8 -https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py39hf3d152e_1.tar.bz2#4252d0c211566a9f65149ba7f6e87aa4 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_3.conda#fd5062942bfa1b0bd5e0d2a4397b099e +https://conda.anaconda.org/conda-forge/linux-64/psutil-7.0.0-py310ha75aee5_0.conda#da7d592394ff9084a23f62a1186451a2 +https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda#12c566707c80111f9799308d9e265aef +https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.1-pyhd8ed1ab_0.conda#232fb4577b6687b2d503ef8e254270c9 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py310h89163eb_2.conda#fd343408e64cf1e273ab7c710da374db +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h00ab1b0_0.conda#f1b776cff1b426e7e7461a8502a3b731 -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.1-pyhd8ed1ab_0.conda#2fcb582444635e2c402e8569bb94e039 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 -https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_0.conda#b4537c98cb59f8725b0e1e65816b4a28 -https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.3-py39hd1e30aa_0.conda#dc0fb8e157c7caba4c98f1e1f9d2e5f4 -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_0.conda#7ef7c0f111dad1c8006504a0f1ccd820 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.0-hf2295e7_6.conda#a1e026a82a562b443845db5614ca568a -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 -https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.1.0-ha957f24_692.conda#e7f5c5cda17c6f5047db27d44367c19d -https://conda.anaconda.org/conda-forge/noarch/partd-1.4.1-pyhd8ed1ab_0.conda#acf4b7c0bcd5fa3b0e05801c4d2accd6 -https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.7-pyhd8ed1ab_0.conda#fb32097c717486aa34b38a9db57eb49e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda#fa839b5ff59e192f411ccc7dae6588bb +https://conda.anaconda.org/conda-forge/noarch/tenacity-9.1.2-pyhd8ed1ab_0.conda#5d99943f2ae3cc69e1ada12ce9d4d701 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/noarch/toolz-1.0.0-pyhd8ed1ab_1.conda#40d0ed782a8aaa16ef248e68c06c168d +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py310ha75aee5_0.conda#166d59aab40b9c607b4cc21c03924e9d +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda#83fc6ae00127671e301c9f44254c31b8 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-16.0.0-py310ha75aee5_0.conda#1d7a4b9202cdd10d56ecdd7f6c347190 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.44-hb9d3cd8_0.conda#7c91bfc90672888259675ad2ad28af9c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda#0c3cc595284c5e8f0f9900a9b228a332 +https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_1.conda#74ac5069774cdbc53910ec4d631a3999 +https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda#0a01c169f0ab0f91b26e77a3301fbfe4 +https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.9.0-h2b85faf_0.conda#3cb814f83f1f71ac1985013697f80cc1 +https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.1-py310h8deb56e_0.conda#1fc24a3196ad5ede2a68148be61894f4 +https://conda.anaconda.org/conda-forge/linux-64/cytoolz-1.0.1-py310ha75aee5_0.conda#d0be1adaa04a03aed745f3d02afb59ce +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.57.0-py310h89163eb_0.conda#34378af82141b3c1725dcdf898b28fc6 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/linux-64/gfortran-13.3.0-h9576a4e_2.conda#19e6d3c9cde10a0a9a170a684082588e +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-13.3.0-hb919d3a_10.conda#7ce070e3329cd10bf79dbed562a21bd4 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.84.1-h4833e2c_1.conda#418de18c9b79a3d8583d90d27e0937c2 +https://conda.anaconda.org/conda-forge/linux-64/gxx-13.3.0-h9576a4e_2.conda#07e8df00b7cd3084ad3ef598ce32a71c +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-13.3.0-h6834431_10.conda#9a8ebde471cec5cc9c48f8682f434f92 +https://conda.anaconda.org/conda-forge/noarch/h2-4.2.0-pyhd8ed1ab_0.conda#b4754fb1bdcb70c8fd54f918301582c6 +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda#f4b39bf00c69f56ac01e020ebfac066c +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.5.2-pyhd8ed1ab_0.conda#c85c76dc67d75619a92f51dfbce06992 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda#928b8be80851f5d8ffb016f9c81dae7a +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-12_hce4cc19_netlib.conda#bdcf65db13abdddba7af29592f93600b +https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.4-he9d0ab4_0.conda#96c33bbd084ef2b2463503fb7f1482ae +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.9.2-h65c71a3_0.conda#d045b1d878031eb497cab44e6392b1df +https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_1.conda#71abbefb6f3b95e1668cd5e0af3affb9 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.22.0-py310h454958d_1.tar.bz2#607c66f0cce2986515a8fe9e136b2b57 +https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165 +https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda#0badf9c54e24cecfb0ad2f99d680c163 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_0.conda#81458b3aed8ab8711951ec3c0c04e097 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.1-h98fc4e7_1.conda#b04b5cdf3ba01430db27979250bc5a1d -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.4.0-h3d44ed6_0.conda#27f46291a6aaa3c2a4f798ebd35a7ddb -https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-7.1.0-hd8ed1ab_0.conda#6ef2b72d291b39e479d7694efa2b2b98 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_mkl.conda#eb6deb4ba6f92ea3f31c09cb8b764738 -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2024.1.0-ha770c72_692.conda#56142862a71bcfdd6ef2ce95c8e90755 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/noarch/dask-core-2024.4.2-pyhd8ed1ab_0.conda#bb4e6c52855aa64a5443ca4eedaa6cfe -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.1-hfa15dee_1.conda#a6dd2bbc684913e2bef0a54ce56fcbfb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_mkl.conda#d6f942423116553f068b2f2d93ffea2e -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_mkl.conda#4edf2e7ce63920e4f539d12e32fb478e -https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_mkl.conda#aa0a5a70e1c957d5911e76ac98e471e1 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.19.5-py39hd249d9e_3.tar.bz2#0cf333996ebdeeba8d1c8c1c0ee9eff9 -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_mkl.conda#3cb0e51433c88d2f4cdfb50c5c08a683 -https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-lite-2019.12.3-py39hd257fcd_5.tar.bz2#32dba66d6abc2b4b5b019c9e54307312 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 -https://conda.anaconda.org/conda-forge/linux-64/polars-0.19.12-py39h90d8ae4_0.conda#191828961c95f8d59fa2b86a590f9905 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39hd257fcd_1.tar.bz2#c4b698994b2d8d2e659ae02202e6abe4 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.6.0-py39hee8e79c_0.tar.bz2#3afcb78281836e61351a2924f3230060 -https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-mkl.conda#ead856637ff8a7feba572e2cf23b453b -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.3.4-py39hf3d152e_0.tar.bz2#cbaec993375a908bbe506dc7328d747c -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_1.tar.bz2#6fb0628d6195d8b6caa2422d09296399 -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42 -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.2-py39hd257fcd_0.tar.bz2#bd7cdadf70e34a19333c3aacc40206e8 -https://conda.anaconda.org/conda-forge/noarch/tifffile-2020.6.3-py_0.tar.bz2#1fb771bb25b2eecbc73abf5143fa35bd -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.17.2-py39hde0f152_4.tar.bz2#2a58a7e382317b03f023b2fddf40f8a1 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/linux-64/sip-6.8.6-py310hf71b8c6_2.conda#a50d1007fecaff3f98b19034a8e0b2e7 +https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.13.2-h0e9735f_0.conda#568ed1300869dca0ba09fb750cda5dbb +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda#b5fcc7172d22516e1f965490e65e33a4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.6-hb9d3cd8_0.conda#5efa5fa6243a622445fdfd72aee15efa +https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.13.4-pyha770c72_0.conda#9f07c4fc992adb2d6c30da7fab3959a7 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-31_hdec4247_blis.conda#1675e95a742c910204645f7b6d7e56dc +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.9.0-h1a2810e_0.conda#1ce8b218d359d9ed0ab481f2a3f3c512 +https://conda.anaconda.org/conda-forge/noarch/dask-core-2025.4.1-pyhd8ed1ab_0.conda#0735ecef025a6c2d6eb61aae4785fc3f +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.9.0-h36df796_0.conda#cc0cf942201f9d3b0e9654ea02e12486 +https://conda.anaconda.org/conda-forge/linux-64/glib-2.84.1-h6287aef_1.conda#35012688d30e1b52bff2ba5d1f342a50 +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.12.30-py310h78a9a29_0.conda#e0c50079904122427bcf52e1afcd1cdb +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.5.2-pyhd8ed1ab_0.conda#e376ea42e9ae40f3278b0f79c9bf9826 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.4-default_h1df26ce_0.conda#96f8d5b2e94c9ba4fef19f1adf068a15 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-20.1.4-default_he06ed0a_0.conda#2d933632c8004be47deb2be61bf013be +https://conda.anaconda.org/conda-forge/linux-64/libpq-17.4-h27ae623_1.conda#37fba334855ef3b51549308e61ed7a3d +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/pandas-1.4.0-py310hb5077e9_0.tar.bz2#43e920bc9856daa7d8d18fcbfb244c4e +https://conda.anaconda.org/conda-forge/noarch/patsy-1.0.1-pyhd8ed1ab_1.conda#ee23fabfd0a8c6b8d6f3729b47b2859d +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py310h7e6dc6c_0.conda#14d300b9e1504748e70cc6499a7b4d25 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.30-py310h031f9ce_0.conda#0743f5db9f978b6df92d412935ff8371 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.13.0-py310hf71b8c6_1.conda#0c8cbfbe70f4c8a47b040a14615e6f1f +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.6.0-py310h261611a_0.conda#04a405ee0bccb4de8d1ed0c87704f5f6 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.8.0-py310hea5193d_1.tar.bz2#664d80ddeb51241629b3ada5ea926e4d +https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py310ha75aee5_2.conda#f9254b5b0193982416b91edcb4b2676f +https://conda.anaconda.org/conda-forge/linux-64/blas-2.131-blis.conda#87829e6b9fe49a926280e100959b7d2b +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda#09262e66b19567aff4f592fb53b28760 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.9.0-ha770c72_0.conda#5859096e397aba423340d0bbbb11ec64 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.11-hc37bda9_0.conda#056d86cacf2b48c79c6a562a2486eb8c +https://conda.anaconda.org/conda-forge/noarch/imageio-2.37.0-pyhfb79c49_0.conda#b5577bc2212219566578fd5af9993af6 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.5.0-py310h23f4a51_0.tar.bz2#9911225650b298776c8e8c083b5cacf1 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hac146a9_1.conda#66b1fa9608d8836e25f9919159adc9c6 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.1-py310h7c3ba0c_0.tar.bz2#89f5a48e1f23b5cf3163a6094903d181 +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.2-py310h261611a_0.conda#4b8508bab02b2aa2cef12eab4883f4a1 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2025.3.30-pyhd8ed1ab_0.conda#14f46147fae19bb867f82a787c7059e9 +https://conda.anaconda.org/conda-forge/noarch/towncrier-24.8.0-pyhd8ed1ab_1.conda#820b6a1ddf590fba253f8204f7200d82 +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.4.0-pyhd8ed1ab_0.conda#c1e349028e0052c4eea844e94f773065 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.11-h651a532_0.conda#d8d8894f8ced2c9be76dc9ad1ae531ce +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-11.1.0-h3beb420_0.conda#95e3bb97f9cdc251c0c68640e9c10ed3 +https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_1.conda#a9b9368f3701a417eac9edbcae7cb737 +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.0-py310hb5077e9_0.tar.bz2#aa24b3a4aa979641ac3144405209cd89 +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_3.conda#fd96da444e81f9e6fcaac38590f3dd42 +https://conda.anaconda.org/conda-forge/noarch/pooch-1.6.0-pyhd8ed1ab_0.tar.bz2#6429e1d1091c51f626b5dcfdd38bf429 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.15-h993ce98_3.conda#aa49f5308f39277477d47cd6687eb8f3 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_3.conda#62afb877ca2c2b4b6f9ecb37320085b6 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.10-py310hb3b5edb_1.conda#c370972fc4557cb54d265c9c1f71bd20 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.5.0-py310hff52083_0.tar.bz2#1b2f3b135d5d9c594b5e0e6150c03b7b https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb -https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.15.0-pyhd8ed1ab_0.conda#1a49ca9515ef9a96edff2eea06143dc6 -https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 -https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 +https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.15.3-pyhd8ed1ab_0.conda#55e445f4fcb07f2471fb0e1102d36488 +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_1.conda#bf22cb9c439572760316ce0748af3713 +https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.6.0-pyhd8ed1ab_0.conda#b04f3c04e4f7939c6207dc0c0355f468 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.17.1-pyhd8ed1ab_0.conda#0adfccc6e7269a29a63c1c8ee3c6d8ba +https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 +https://conda.anaconda.org/conda-forge/noarch/sphinx-remove-toctrees-1.0.0.post1-pyhd8ed1ab_1.conda#b275c865b753413caaa8548b9d44c024 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda#16e3f039c0aa6446513e94ab18a8784b +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda#910f28a05c178feba832f842155cbfff +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda#e9fb3fe8a5b758b4aff187d434f94f03 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda#00534ebcc0375929b45c3039b5ba7636 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda#3bc61f7161d28137797e038263c04c54 +# pip libsass @ https://files.pythonhosted.org/packages/fd/5a/eb5b62641df0459a3291fc206cf5bd669c0feed7814dded8edef4ade8512/libsass-0.23.0-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.whl#sha256=4a218406d605f325d234e4678bd57126a66a88841cb95bee2caeafdc6f138306 +# pip sphinxcontrib-sass @ https://files.pythonhosted.org/packages/2e/87/7c2eb08e3ca1d6baae32c0a5e005330fe1cec93a36aa085e714c3b3a3c7d/sphinxcontrib_sass-0.3.4-py2.py3-none-any.whl#sha256=a0c79a44ae8b8935c02dc340ebe40c9e002c839331201c899dc93708970c355a +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/92/0a/970b80b4fa1feeb6deb6f2e22d4cb14e388b27b315a1afdb9db930ff91a4/sphinxext_opengraph-0.9.1-py3-none-any.whl#sha256=b3b230cc6a5b5189139df937f0d9c7b23c7c204493b22646273687969dcb760e diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index 345e08b4bece4..00526f062f200 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 -# List all available versions of the documentation +# Write the available versions page (--rst) and the version switcher JSON (--json). +# Version switcher see: +# https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/version-dropdown.html +# https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/announcements.html#announcement-banners + +import argparse import json import re import sys @@ -52,19 +57,22 @@ def get_file_size(version): return human_readable_data_quantity(path_details["size"], 1000) -print(":orphan:") -print() -heading = "Available documentation for Scikit-learn" -print(heading) -print("=" * len(heading)) -print() -print("Web-based documentation is available for versions listed below:") -print() +parser = argparse.ArgumentParser() +parser.add_argument("--rst", type=str, required=True) +parser.add_argument("--json", type=str, required=True) +args = parser.parse_args() + +heading = "Available documentation for scikit-learn" +json_content = [] +rst_content = [ + ":orphan:\n", + heading, + "=" * len(heading) + "\n", + "Web-based documentation is available for versions listed below:\n", +] -ROOT_URL = ( - "https://api.github.com/repos/scikit-learn/scikit-learn.github.io/contents/" # noqa -) -RAW_FMT = "https://raw.githubusercontent.com/scikit-learn/scikit-learn.github.io/master/%s/index.html" # noqa +ROOT_URL = "https://api.github.com/repos/scikit-learn/scikit-learn.github.io/contents/" +RAW_FMT = "https://raw.githubusercontent.com/scikit-learn/scikit-learn.github.io/master/%s/index.html" VERSION_RE = re.compile(r"scikit-learn ([\w\.\-]+) documentation") NAMED_DIRS = ["dev", "stable"] @@ -93,8 +101,9 @@ def get_file_size(version): # Output in order: dev, stable, decreasing other version seen = set() -for name in NAMED_DIRS + sorted( - (k for k in dirs if k[:1].isdigit()), key=parse_version, reverse=True +for i, name in enumerate( + NAMED_DIRS + + sorted((k for k in dirs if k[:1].isdigit()), key=parse_version, reverse=True) ): version_num, file_size = dirs[name] if version_num in seen: @@ -102,17 +111,32 @@ def get_file_size(version): continue else: seen.add(version_num) - name_display = "" if name[:1].isdigit() else " (%s)" % name - path = "https://scikit-learn.org/%s/" % name - out = "* `Scikit-learn %s%s documentation <%s>`_" % ( - version_num, - name_display, - path, - ) + + full_name = f"{version_num}" if name[:1].isdigit() else f"{version_num} ({name})" + path = f"https://scikit-learn.org/{name}/" + + # Update JSON for the version switcher; only keep the 8 latest versions to avoid + # overloading the version switcher dropdown + if i < 8: + info = {"name": full_name, "version": version_num, "url": path} + if name == "stable": + info["preferred"] = True + json_content.append(info) + + # Printout for the historical version page + out = f"* `scikit-learn {full_name} documentation <{path}>`_" if file_size is not None: file_extension = get_file_extension(version_num) out += ( f" (`{file_extension.upper()} {file_size} <{path}/" f"_downloads/scikit-learn-docs.{file_extension}>`_)" ) - print(out) + rst_content.append(out) + +with open(args.rst, "w", encoding="utf-8") as f: + f.write("\n".join(rst_content) + "\n") +print(f"Written {args.rst}") + +with open(args.json, "w", encoding="utf-8") as f: + json.dump(json_content, f, indent=2) +print(f"Written {args.json}") diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml deleted file mode 100644 index 09874e081b460..0000000000000 --- a/build_tools/cirrus/arm_tests.yml +++ /dev/null @@ -1,34 +0,0 @@ -linux_aarch64_test_task: - compute_engine_instance: - image_project: cirrus-images - image: family/docker-builder-arm64 - architecture: arm64 - platform: linux - cpu: 4 - memory: 6G - env: - CONDA_ENV_NAME: testenv - LOCK_FILE: build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock - CONDA_PKGS_DIRS: /root/.conda/pkgs - HOME: / # $HOME is not defined in image and is required to install mambaforge - # Upload tokens have been encrypted via the CirrusCI interface: - # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables - # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. - BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] - ccache_cache: - folder: /root/.cache/ccache - conda_cache: - folder: /root/.conda/pkgs - fingerprint_script: cat build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock - - install_python_script: | - # Install python so that update_tracking_issue has access to a Python - apt install -y python3 python-is-python3 - - test_script: | - bash build_tools/cirrus/build_test_arm.sh - # On success, this script is run updating the issue. - bash build_tools/cirrus/update_tracking_issue.sh true - - on_failure: - update_tracker_script: bash build_tools/cirrus/update_tracking_issue.sh false diff --git a/build_tools/cirrus/arm_wheel.yml b/build_tools/cirrus/arm_wheel.yml deleted file mode 100644 index c3dfcfbc53ad9..0000000000000 --- a/build_tools/cirrus/arm_wheel.yml +++ /dev/null @@ -1,76 +0,0 @@ -linux_arm64_wheel_task: - compute_engine_instance: - image_project: cirrus-images - image: family/docker-builder-arm64 - architecture: arm64 - platform: linux - cpu: 4 - memory: 4G - env: - CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 - SKLEARN_BUILD_PARALLEL=5 - CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh - CIBW_TEST_REQUIRES: pytest pandas threadpoolctl pytest-xdist - CIBW_BUILD_VERBOSITY: 1 - # Upload tokens have been encrypted via the CirrusCI interface: - # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables - # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. - BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] - matrix: - # Only the latest Python version is tested - - env: - CIBW_BUILD: cp39-manylinux_aarch64 - CIBW_TEST_SKIP: "*_aarch64" - - env: - CIBW_BUILD: cp310-manylinux_aarch64 - CIBW_TEST_SKIP: "*_aarch64" - - env: - CIBW_BUILD: cp311-manylinux_aarch64 - CIBW_TEST_SKIP: "*_aarch64" - - env: - CIBW_BUILD: cp312-manylinux_aarch64 - - cibuildwheel_script: - - apt install -y python3 python-is-python3 - - bash build_tools/wheels/build_wheels.sh - - on_failure: - update_tracker_script: - - bash build_tools/cirrus/update_tracking_issue.sh false - - wheels_artifacts: - path: "wheelhouse/*" - -# Update tracker when all jobs are successful -update_tracker_success: - depends_on: - - linux_arm64_wheel - container: - image: python:3.11 - # Only update tracker for nightly builds - only_if: $CIRRUS_CRON == "nightly" - update_script: - - bash build_tools/cirrus/update_tracking_issue.sh true - -wheels_upload_task: - depends_on: - - linux_arm64_wheel - container: - image: continuumio/miniconda3:22.11.1 - # Artifacts are not uploaded on PRs - only_if: $CIRRUS_PR == "" - env: - # Upload tokens have been encrypted via the CirrusCI interface: - # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables - SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[9cf0529227577d503f2e19ef31cb690a2272cb243a217fb9a1ceda5cc608e8ccc292050fde9dca94cab766e1dd418519] - SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ENCRYPTED[8fade46af37fa645e57bd1ee21683337aa369ba56f6307ce13889f1e74df94e5bdd21d323baac21e332fd87b8949659a] - ARTIFACTS_PATH: wheelhouse - upload_script: | - conda install curl unzip -y - - # Download and show wheels - curl https://api.cirrus-ci.com/v1/artifact/build/$CIRRUS_BUILD_ID/wheels.zip --output wheels.zip - unzip wheels.zip - ls wheelhouse - - bash build_tools/github/upload_anaconda.sh diff --git a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock deleted file mode 100644 index d9fa69b319d28..0000000000000 --- a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock +++ /dev/null @@ -1,94 +0,0 @@ -# Generated by conda-lock. -# platform: linux-aarch64 -# input_hash: 80459c6003cbcd22780a22a62ed5cc116e951d5c2c14602af1281434263b9138 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2024.2.2-hcefe29a_0.conda#57c226edb90c4e973b9b7503537dd339 -https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-hba4e955_0.conda#b55c1cb33c63d23b542fa53f24541e56 -https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-13.2.0-h9a76618_5.conda#1b79d37dce0fad96bdf3de03925f43b4 -https://conda.anaconda.org/conda-forge/linux-aarch64/python_abi-3.9-4_cp39.conda#c191905a08694e4a5cb1238e90233878 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 -https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#98a1185182fec3c434069fa74e6473d6 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-13.2.0-hf8544c7_5.conda#dee934e640275d9e74e7bbd455f25162 -https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h31becfc_5.conda#a64e35f01e0b7a2a152eca87d33b9c87 -https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.1.0-h31becfc_1.conda#1b219fd801eddb7a94df5bd001053ad9 -https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.20-h31becfc_0.conda#018592a3d691662f451f89d0de474a20 -https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-13.2.0-h582850c_5.conda#547486aac825d236de3beecb927b389c -https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.0.0-h31becfc_1.conda#ed24e702928be089d9ba3f05618515c6 -https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda#c14f32510f694e3185704d89967ec422 -https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969 -https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.4.0-h31becfc_0.conda#5fd7ab3e5f382c70607fbac6335e6e19 -https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda#b4df5d7d4b63579d081fd3a4cf99740e -https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h31becfc_5.conda#b213aa87eea9491ef7b129179322e955 -https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.4.20240210-h0425590_0.conda#c1a1612ddaee95c83abfa0b2ec858626 -https://conda.anaconda.org/conda-forge/linux-aarch64/ninja-1.12.0-h2a328a1_0.conda#c0f3f508baf69c8db8142466beaa0ccc -https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.2.1-h31becfc_1.conda#e95eb18d256edc72058e0dc9be5338a0 -https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.11-h31becfc_0.conda#13de34f69cb73165dbe08c1e9148bedb -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd -https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h31becfc_1.conda#8db7cff89510bec0b863a0a8ee6a7bce -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h31becfc_1.conda#ad3d3a826b5848d99936e4466ebbaa26 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-13.2.0-he9431aa_5.conda#fab7c6a8c84492e18cbe578820e97a56 -https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.43-h194ca79_0.conda#1123e504d9254dd9494267ab9aba95f0 -https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.45.3-h194ca79_0.conda#fb35b8afbe9e92467ac7b5608d60b775 -https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.15-h2a766a3_0.conda#eb3d8c8170e3d03f2564ed2024aa00c8 -https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655 -https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda#f75105e0585851f818e0009dd1dde4dc -https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.5-h4c53e97_0.conda#b74eb9dbb5c3c15cb3cee7cbdf198c75 -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-h31becfc_1.conda#9e4a13596ab651ea8d77aae023d0ce3f -https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hf0a5ef3_2.conda#a5ab74c5bd158c3d5532b66d8d83d907 -https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee -https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.27-pthreads_h5a5ec62_0.conda#ffecca8f4f31cd50b92c0e6e6bfe4416 -https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.6.0-hf980d43_3.conda#b6f3abf5726ae33094bee238b4eb492f -https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-18.1.3-h8b0cb96_0.conda#cd4d2b7580dd020814ea34ebbbca8c5e -https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.19-h4ac3b42_0_cpython.conda#1501507cd9451472ec8900d587ce872f -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-h31becfc_1.conda#e41f5862ac746428407f3fd44d2ed01f -https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.9.1-h6552966_0.conda#758b202f61f6bbfd2c6adf0fde043276 -https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 -https://conda.anaconda.org/conda-forge/linux-aarch64/cython-3.0.10-py39h387a81e_0.conda#0e917a89f77c978d152099357bd75b22 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa -https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.5-py39had2cf8c_1.conda#ddb99610f7b950fdd5ff2aff19136363 -https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.16-h922389a_0.conda#ffdd8267a04c515e7ce69c727b051414 -https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-22_linuxaarch64_openblas.conda#068ab33f2382cda4dd0b72a715ad33b5 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.27-pthreads_h339cbfa_0.conda#cb06c34a3056f59e9e244c20836add8a -https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.2-h0d9d63b_0.conda#fd2898519e839d5ceb778343f39a3176 -https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.4.0-pyhc1e730c_0.conda#b296278eef667c673bf51de6535bad88 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4-py39h7cc1d5f_0.conda#2c06a653ebfa389c18aea2d8f338df3b -https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.1.0-py39h898b7ef_0.conda#8c072c9329aeea97a46005625267a851 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae -https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a -https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.51.0-py39h898b7ef_0.conda#7b6a069c66a729454fb4c534ed145dcd -https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d -https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.0-pyhd8ed1ab_0.conda#e0ed1bf13ce3a440e022157bf4764465 -https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-22_linuxaarch64_openblas.conda#fbe7fe553f2cc78a0311e009b26f180d -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-22_linuxaarch64_openblas.conda#8c709d281609792c39b1d5c0241f90f1 -https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 -https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-10.3.0-py39h71661b1_0.conda#dae548b7b537d7ef796d1d4c38a55319 -https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c -https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-22_linuxaarch64_openblas.conda#5acf669e0be669f30f4b813d2ecda7b8 -https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 -https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.26.4-py39h91c28bb_0.conda#d88e195f11a9f27e649aea408b54cb48 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-22_linuxaarch64_openblas.conda#a5b77b6c6807661afd716f33e85814b3 -https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.2.1-py39hd16970a_0.conda#66b9718539ecdd38876b0176c315bcad -https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.13.0-py39h91c28bb_0.conda#2b6f1ed053a61c2447304e4b810fc397 -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.122-openblas.conda#65bc48b3bc85f8eeeab54311443a83aa -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.8.4-py39h8e43113_0.conda#f397ddfe5c551732de61a92106a14cf3 -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.8.4-py39ha65689a_0.conda#d501bb96ff505fdd431fd8fdac8efbf9 diff --git a/build_tools/cirrus/update_tracking_issue.sh b/build_tools/cirrus/update_tracking_issue.sh deleted file mode 100644 index 9166210ac0007..0000000000000 --- a/build_tools/cirrus/update_tracking_issue.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Update tracking issue if Cirrus fails nightly job - -if [[ "$CIRRUS_CRON" != "nightly" ]]; then - exit 0 -fi - -# TEST_PASSED is either "true" or "false" -TEST_PASSED="$1" - -python -m venv .venv -source .venv/bin/activate -python -m pip install defusedxml PyGithub - -LINK_TO_RUN="https://cirrus-ci.com/build/$CIRRUS_BUILD_ID" - -python maint_tools/update_tracking_issue.py \ - $BOT_GITHUB_TOKEN \ - $CIRRUS_TASK_NAME \ - $CIRRUS_REPO_FULL_NAME \ - $LINK_TO_RUN \ - --tests-passed $TEST_PASSED \ - --auto-close false diff --git a/build_tools/codespell_ignore_words.txt b/build_tools/codespell_ignore_words.txt index fbe501d04f29f..6b942a2eabe6d 100644 --- a/build_tools/codespell_ignore_words.txt +++ b/build_tools/codespell_ignore_words.txt @@ -1,13 +1,17 @@ +achin aggresive aline ba basf boun bre +bu cach +chanel complies coo copys +datas deine didi feld @@ -17,20 +21,25 @@ fro fwe gool hart +heping hist ines inout ist jaques +lamas linke lod mape +mis mor nd nmae ocur pullrequest +repid ro +ser soler suh suprised @@ -40,6 +49,8 @@ teh thi usal vie +vor wan +whis winn yau diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index 483dc3739506e..6dcddda40af4d 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -15,9 +15,9 @@ import requests -print("user:", file=sys.stderr) +print("Input user:", file=sys.stderr) user = input() -token = getpass.getpass("access token:\n") +token = getpass.getpass("Input access token:\n") auth = (user, token) LOGO_URL = "https://avatars2.githubusercontent.com/u/365630?v=4" @@ -63,11 +63,13 @@ def get_contributors(): ), (core_devs, contributor_experience_team, comm_team, documentation_team), ): + print(f"Retrieving {team_slug}\n") for page in [1, 2]: # 30 per page reply = get(f"{entry_point}teams/{team_slug}/members?page={page}") lst.extend(reply.json()) # get members of scikit-learn on GitHub + print("Retrieving members\n") members = [] for page in [1, 2, 3]: # 30 per page reply = get(f"{entry_point}members?page={page}") @@ -214,6 +216,7 @@ def generate_list(contributors): documentation_team, ) = get_contributors() + print("Generating rst files") with open( REPO_FOLDER / "doc" / "maintainers.rst", "w+", encoding="utf-8" ) as rst_file: diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py index b357c68f23e3e..48ff14a058c9a 100644 --- a/build_tools/get_comment.py +++ b/build_tools/get_comment.py @@ -55,10 +55,7 @@ def get_step_message(log, start, end, title, message, details): if end not in log: return "" res = ( - "-----------------------------------------------\n" - + f"### {title}\n\n" - + message - + "\n\n" + f"-----------------------------------------------\n### {title}\n\n{message}\n\n" ) if details: res += ( @@ -93,33 +90,31 @@ def get_message(log_file, repo, pr_number, sha, run_id, details, versions): message = "" - # black + # ruff check message += get_step_message( log, - start="### Running black ###", - end="Problems detected by black", - title="`black`", + start="### Running the ruff linter ###", + end="Problems detected by ruff check", + title="`ruff check`", message=( - "`black` detected issues. Please run `black .` locally and push " - "the changes. Here you can see the detected issues. Note that " - "running black might also fix some of the issues which might be " - "detected by `ruff`. Note that the installed `black` version is " - f"`black={versions['black']}`." + "`ruff` detected issues. Please run " + "`ruff check --fix --output-format=full` locally, fix the remaining " + "issues, and push the changes. Here you can see the detected issues. Note " + f"that the installed `ruff` version is `ruff={versions['ruff']}`." ), details=details, ) - # ruff + # ruff format message += get_step_message( log, - start="### Running ruff ###", - end="Problems detected by ruff", - title="`ruff`", + start="### Running the ruff formatter ###", + end="Problems detected by ruff format", + title="`ruff format`", message=( - "`ruff` detected issues. Please run " - "`ruff check --fix --output-format=full .` locally, fix the remaining " - "issues, and push the changes. Here you can see the detected issues. Note " - f"that the installed `ruff` version is `ruff={versions['ruff']}`." + "`ruff` detected issues. Please run `ruff format` locally and push " + "the changes. Here you can see the detected issues. Note that the " + f"installed `ruff` version is `ruff={versions['ruff']}`." ), details=details, ) @@ -240,7 +235,7 @@ def get_headers(token): def find_lint_bot_comments(repo, token, pr_number): """Get the comment from the linting bot.""" # repo is in the form of "org/repo" - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments response = requests.get( f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", headers=get_headers(token), @@ -275,7 +270,7 @@ def create_or_update_comment(comment, message, repo, pr_number, token): # repo is in the form of "org/repo" if comment is not None: print("updating existing comment") - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment response = requests.patch( f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}", headers=get_headers(token), @@ -283,7 +278,7 @@ def create_or_update_comment(comment, message, repo, pr_number, token): ) else: print("creating new comment") - # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment # noqa + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment response = requests.post( f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", headers=get_headers(token), diff --git a/build_tools/github/Windows b/build_tools/github/Windows deleted file mode 100644 index a9971aa525581..0000000000000 --- a/build_tools/github/Windows +++ /dev/null @@ -1,13 +0,0 @@ -# Get the Python version of the base image from a build argument -ARG PYTHON_VERSION -FROM winamd64/python:$PYTHON_VERSION-windowsservercore - -ARG WHEEL_NAME -ARG CIBW_TEST_REQUIRES - -# Copy and install the Windows wheel -COPY $WHEEL_NAME $WHEEL_NAME -RUN pip install $env:WHEEL_NAME - -# Install the testing dependencies -RUN pip install $env:CIBW_TEST_REQUIRES.split(" ") diff --git a/build_tools/github/build_minimal_windows_image.sh b/build_tools/github/build_minimal_windows_image.sh index 2995b6906c535..8cc9af937dfd9 100755 --- a/build_tools/github/build_minimal_windows_image.sh +++ b/build_tools/github/build_minimal_windows_image.sh @@ -5,21 +5,47 @@ set -x PYTHON_VERSION=$1 -TEMP_FOLDER="$HOME/AppData/Local/Temp" -WHEEL_PATH=$(ls -d $TEMP_FOLDER/**/*/repaired_wheel/*) -WHEEL_NAME=$(basename $WHEEL_PATH) +FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")" -cp $WHEEL_PATH $WHEEL_NAME +if [[ $FREE_THREADED_BUILD == "False" ]]; then + # Prepare a minimal Windows environment without any developer runtime libraries + # installed to check that the scikit-learn wheel does not implicitly rely on + # external DLLs when running the tests. + TEMP_FOLDER="$HOME/AppData/Local/Temp" + WHEEL_PATH=$(ls -d $TEMP_FOLDER/**/*/repaired_wheel/*) + WHEEL_NAME=$(basename $WHEEL_PATH) -# Dot the Python version for identyfing the base Docker image -PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2}) + cp $WHEEL_PATH $WHEEL_NAME -if [[ "$CIBW_PRERELEASE_PYTHONS" == "True" ]]; then - PYTHON_VERSION="$PYTHON_VERSION-rc" + # Dot the Python version for identifying the base Docker image + PYTHON_DOCKER_IMAGE_PART=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2}) + + if [[ "$CIBW_PRERELEASE_PYTHONS" =~ [tT]rue ]]; then + PYTHON_DOCKER_IMAGE_PART="${PYTHON_DOCKER_IMAGE_PART}-rc" + fi + + # We could have all of the following logic in a Dockerfile but it's a lot + # easier to do it in bash rather than figure out how to do it in Powershell + # inside the Dockerfile ... + DOCKER_IMAGE="winamd64/python:${PYTHON_DOCKER_IMAGE_PART}-windowsservercore" + MNT_FOLDER="C:/mnt" + CONTAINER_ID=$(docker run -it -v "$(cygpath -w $PWD):$MNT_FOLDER" -d $DOCKER_IMAGE) + + function exec_inside_container() { + docker exec $CONTAINER_ID powershell -Command $1 + } + + exec_inside_container "python -m pip install $MNT_FOLDER/$WHEEL_NAME" + exec_inside_container "python -m pip install $CIBW_TEST_REQUIRES" + + # Save container state to scikit-learn/minimal-windows image. On Windows the + # container needs to be stopped first. + docker stop $CONTAINER_ID + docker commit $CONTAINER_ID scikit-learn/minimal-windows +else + # This is too cumbersome to use a Docker image in the free-threaded case + # TODO When pandas has a release with a Windows free-threaded wheel we can + # replace the next line with + # python -m pip install CIBW_TEST_REQUIRES + python -m pip install pytest fi -# Build a minimal Windows Docker image for testing the wheels -docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \ - --build-arg WHEEL_NAME=$WHEEL_NAME \ - --build-arg CIBW_TEST_REQUIRES="$CIBW_TEST_REQUIRES" \ - -f build_tools/github/Windows \ - -t scikit-learn/minimal-windows . diff --git a/build_tools/cirrus/build_test_arm.sh b/build_tools/github/build_test_arm.sh similarity index 57% rename from build_tools/cirrus/build_test_arm.sh rename to build_tools/github/build_test_arm.sh index 551dc3689e010..db11fdc0e82f0 100755 --- a/build_tools/cirrus/build_test_arm.sh +++ b/build_tools/github/build_test_arm.sh @@ -22,28 +22,10 @@ setup_ccache() { ccache -M 0 } -MAMBAFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-aarch64.sh" - -# Install Mambaforge -curl -L --retry 10 $MAMBAFORGE_URL -o mambaforge.sh -MAMBAFORGE_PATH=$HOME/mambaforge -bash ./mambaforge.sh -b -p $MAMBAFORGE_PATH -export PATH=$MAMBAFORGE_PATH/bin:$PATH -mamba init --all --verbose -mamba update --yes mamba -mamba update --yes conda -mamba install "$(get_dep conda-lock min)" -y -conda-lock install --name $CONDA_ENV_NAME $LOCK_FILE -source activate $CONDA_ENV_NAME - setup_ccache python --version -# Set parallelism to $N_CORES + 1 to overlap IO bound tasks with CPU bound tasks on CI -# workers with $N_CORES cores when building the compiled extensions of scikit-learn. -export SKLEARN_BUILD_PARALLEL=$(($N_CORES + 1)) - # Disable the build isolation and build in the tree so that the same folder can be # cached between CI runs. pip install --verbose --no-build-isolation . @@ -51,7 +33,7 @@ pip install --verbose --no-build-isolation . # Report cache usage ccache -s --verbose -mamba list +micromamba list # Changing directory not to have module resolution use scikit-learn source # directory but to the installed package. diff --git a/build_tools/github/check_build_trigger.sh b/build_tools/github/check_build_trigger.sh index 3a38924aa23a7..e6bc77b00e71f 100755 --- a/build_tools/github/check_build_trigger.sh +++ b/build_tools/github/check_build_trigger.sh @@ -5,9 +5,9 @@ set -x COMMIT_MSG=$(git log --no-merges -1 --oneline) -# The commit marker "[cd build]" or "[cd build gh]" will trigger the build when required +# The commit marker "[cd build]" will trigger the build when required if [[ "$GITHUB_EVENT_NAME" == schedule || - "$COMMIT_MSG" =~ \[cd\ build\] || - "$COMMIT_MSG" =~ \[cd\ build\ gh\] ]]; then + "$GITHUB_EVENT_NAME" == workflow_dispatch || + "$COMMIT_MSG" =~ \[cd\ build\] ]]; then echo "build=true" >> $GITHUB_OUTPUT fi diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index 5579d86c5ce3e..21c9a529b265b 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -16,13 +16,6 @@ # plus one more for the sdist n_wheels += 1 -# arm64 builds from cirrus -cirrus_path = Path.cwd() / "build_tools" / "cirrus" / "arm_wheel.yml" -with cirrus_path.open("r") as f: - cirrus_config = yaml.safe_load(f) - -n_wheels += len(cirrus_config["linux_arm64_wheel_task"]["matrix"]) - dist_files = list(Path("dist").glob("**/*")) n_dist_files = len(dist_files) diff --git a/build_tools/github/create_gpu_environment.sh b/build_tools/github/create_gpu_environment.sh new file mode 100755 index 0000000000000..96a62d7678566 --- /dev/null +++ b/build_tools/github/create_gpu_environment.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -e +set -x + +curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" +bash Miniforge3-$(uname)-$(uname -m).sh -b -p "${HOME}/conda" +source "${HOME}/conda/etc/profile.d/conda.sh" + + +# defines the get_dep and show_installed_libraries functions +source build_tools/shared.sh +conda activate base + +CONDA_ENV_NAME=sklearn +LOCK_FILE=build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock +create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE + +conda activate $CONDA_ENV_NAME +conda list diff --git a/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock b/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock new file mode 100644 index 0000000000000..868f3f9d863c8 --- /dev/null +++ b/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock @@ -0,0 +1,249 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 0c167b26e12c284b769bf4d76bd3e604db266ed21c8f9e11e4bb737419ccdc93 +@EXPLICIT +https://conda.anaconda.org/conda-forge/noarch/cuda-version-11.8-h70ddcb2_3.conda#670f0e1593b8c1d84f57ad5fe5256799 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-he073ed8_18.conda#ad8527bf134a90e1c9ed35fa0b64318c +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-7_cp313.conda#e84b44e6300f1703cb25d29120c5b1d8 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda#01f8d123c96816249efd255a31ad7712 +https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda#434ca7e50e40f4918ab701e3facd59a0 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-20.1.4-h024ca30_0.conda#4fc395cda27912a7d904b86b5dbf3a4d +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.17-h0157908_18.conda#460eba7851277ec1fd80a1a24080787a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-3_kmp_llvm.conda#ee5c2118262e30b972bc0b4db8ef0ba5 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda#c151d5eb730e9b7480e6d48c0fc44048 +https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_2.conda#7df50d44d4a14d6c31a2c54f2cd92157 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.14-hb9d3cd8_0.conda#76df83c2a9035c54df5d04ff81bcc02d +https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.10.6-hb9d3cd8_0.conda#d7d4680337a14001b0e043e96529409b +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda#f7f0d6cc2dc986d42ac2689ec88192be +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda#41b599ed2b02abcfdd84302bff174b23 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h86f0d12_0.conda#27fe770decaf469a53f3e3a6d593067f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda#db0bfbe7dd197b68ad5f30333bae6ce0 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda#ede4673863426c0883c0063d853bbd85 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda#556a4fdfac7287d349b8f09aba899693 +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087 +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.0-hb9d3cd8_0.conda#9fa334557db9f63da6c9285fd2a48638 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_1.conda#a76fd702c93cd2dfd89eff30a5fd45a8 +https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0 +https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.9.0-hb9d3cd8_1.conda#1e936bd23d737aac62a18e9a1e7f8b18 +https://conda.anaconda.org/conda-forge/linux-64/libuv-1.50.0-hb9d3cd8_0.conda#771ee65e13bc599b0b62af5359d80169 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.5.0-h851e524_0.conda#63f790534398730f59e1b899c3644d4a +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_1.conda#de356753cfdbffcde5bb1e86e3aa6cd0 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.8.1-h1a47875_3.conda#55a8561fdbbbd34f50f57d9be12ed084 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.3.0-h4e1184b_5.conda#3f4c1197462a6df2be6dc8241828fe93 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.2.1-h4e1184b_4.conda#a5126a90e74ac739b00564a4c7ddcc36 +https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.2.2-h4e1184b_4.conda#74e8c3e4df4ceae34aa2959df4b28101 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 +https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.1-h5888daf_0.conda#bfd56492d8346d669010eccafe0ba058 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.7.0-h5888daf_0.conda#d6845ae4dea52a2f90178bf1829a21f8 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda#d411fc29e338efb48c5fd4576d71d881 +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda#9344155d33912347b37f0ae6c410a835 +https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_hbbce691_4.conda#488f260ccda0afaf08acb286db439c2f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda#9566f0bd264fbd463002e759b8a82401 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hb9d3cd8_2.conda#06f70867945ea6a84d35836af780f1de +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda#fb54c4ea68b460c278d26eea89cfbcc3 +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 +https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.47-h943b412_0.conda#55199e2ae2c3651f6f9b2a447b47bdc9 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda#9de5350a85c4a20c685259b889aa6393 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.2.0-h266115a_0.conda#db22a0962c953e81a2a679ecb1fc6027 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-hff21bea_1.conda#2322531904f27501ee19847b87ba7c64 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.0-h29eaf8c_0.conda#d2f1c87d4416d1e7344cf92b1aaee1c4 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.11-h072c03f_0.conda#5e8060d52f676a40edef0006a75c718f +https://conda.anaconda.org/conda-forge/linux-64/sleef-3.8-h1b44611_0.conda#aec4dba5d4c2924730088753f6fa164b +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-h8bd8927_1.conda#3b3e64af585eadfb52bb90b553db5edf +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_1.conda#a37843723437ba75f42c9270ffe800b1 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.15.3-h173a860_6.conda#9a063178f1af0a898526cc24ba7be486 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda#c63b5e52939e795ba8d26e35d767a843 +https://conda.anaconda.org/conda-forge/linux-64/cudatoolkit-11.8.0-h4ba93d1_13.conda#eb43f5f1f16e2fad2eba22219c3e499b +https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda#ff862eebdfeb2fd048ae9dc92510baca +https://conda.anaconda.org/conda-forge/linux-64/gmp-6.3.0-hac33072_2.conda#c94a5994ef49749880a8139cf9afcbe1 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 +https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 +https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.124-hb9d3cd8_0.conda#8bc89311041d7fcb510238cf0848ccae +https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.13.3-h48d6fc4_1.conda#3c255be50a506c50765a93a6644f32fe +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.2.0-h69a702a_2.conda#4056c857af1a99ee50589a941059ec55 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda#0a4d0252248ef9a0f88f2ba8b8a08e12 +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.28.2-h5b01275_0.conda#ab0bff36363bec94720275a681af8b83 +https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2024.07.02-hbbce691_2.conda#b2fede24428726dd867611664fb372e8 +https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.21.0-h0e7cc3e_0.conda#dcb95c0a98ba9ff737f7ae482aef7833 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hd9ff511_4.conda#6c1028898cf3a2032d9af46689e1b81a +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.2.0-he0572af_0.conda#93340b072c393d23c4700a1d40565dca +https://conda.anaconda.org/conda-forge/linux-64/nccl-2.26.5.1-h03a54cd_0.conda#47dc81d35df91d38609df9c93d608b2b +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hc749103_2.conda#31614c73d7b103ef76faa4d83d261d34 +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.3-hf636f53_101_cp313.conda#10622e12d649154af0bd76bcf33a7c5c +https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda#353823361b1d27eb3960efb076dfcaf6 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda#1c74ff8c35dcadf952a16f752ca5aa49 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.12-h4f16b4b_0.conda#db038ce880f100acc74dba10302b5630 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.5.0-h7959bf6_11.conda#9b3fb60fe57925a92f399bc3fc42eccf +https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.9.2-hefd7a92_4.conda#5ce4df662d32d3123ea8da15571b6f51 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda#98514fe74548d768907ce7a13f680e8f +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.3-py313hd8ed1ab_101.conda#904a822cbd380adafb9070debf8579a8 +https://conda.anaconda.org/conda-forge/linux-64/cudnn-9.8.0.87-hf36481c_1.conda#988b6d0f8a2660fdee429d3d0f761ed3 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.12-py313h5dec8f5_0.conda#24a42a0c1cc33743e33572d63d489b54 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h9800cb9_1.conda#54dd71b3be2ed6ccc50f180347c901db +https://conda.anaconda.org/conda-forge/noarch/filelock-3.18.0-pyhd8ed1ab_0.conda#4547b39256e296bb758166893e909a7c +https://conda.anaconda.org/conda-forge/noarch/fsspec-2025.3.2-pyhd8ed1ab_0.conda#9c40692c3d24c7aaf335f673ac09d308 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.7-py313h33d0bda_0.conda#9862d13a5e466273d5a4738cffcb8d6c +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda#000e85703f0fd9594c81710dd5066471 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda#728dbebd0f7a20337218beacffd37916 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.13.0-h332b0f4_0.conda#cbdc92ac0d93fe3c796e36ad65c7905c +https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.13.3-ha770c72_1.conda#51f5be229d83ecd401fb369ab96ae669 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.84.1-h2ff4ddf_0.conda#0305434da649d4fb48a425e588b79ea6 +https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda#c8013e438185f33b13814c5c488acd5c +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h4bc477f_1.conda#ad1f1f8238834cd3c88ceeaee8da444a +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.2-py313h8060acc_1.conda#21b62c55924f01b6eef6827167b46acb +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/linux-64/mpfr-4.2.1-h90cbb55_3.conda#2eeb50cab6652538eee8fc0bc3340c81 +https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_1.conda#3585aa87c43ab15b167b574cd73b057b +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/noarch/networkx-3.4.2-pyh267e887_2.conda#fd40bf7f7f4bc4b647dc8512053d9873 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.29-pthreads_h6ec200e_0.conda#7e4d48870b3258bea920d51b7f495a81 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.3-h5fbd93e_0.conda#9e5816bc95d285c115a3ebc2f8563564 +https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.3-h97ab989_1.conda#2f46eae652623114e112df13fae311cf +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh145f28c_0.conda#01384ff1639c6330a0924791413b8714 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.2-pyhd8ed1ab_0.conda#88476ae6ebd24f39261e0854ac244f33 +https://conda.anaconda.org/conda-forge/noarch/pytz-2025.2-pyhd8ed1ab_0.conda#bc8e3267d44011051f2eb14d22fb0960 +https://conda.anaconda.org/conda-forge/linux-64/re2-2024.07.02-h9925aae_2.conda#e84ddf12bde691e8ec894b00ea829ddf +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_1.conda#b0dd904de08b7db706167240bf37b164 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py313h536fd9c_0.conda#5f5cbdd527d2e74e270d8b6255ba714f +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda#83fc6ae00127671e301c9f44254c31b8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.44-hb9d3cd8_0.conda#7c91bfc90672888259675ad2ad28af9c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e +https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.8.0-hb921021_15.conda#c79d50f64cffa5ad51ecc1a81057962f +https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.11.0-h11f4f37_12.conda#96c3e0221fa2da97619ee82faa341a73 +https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.14.0-h5cfcd09_0.conda#0a8838771cc2e985cd295e01ae83baf1 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.11.3-h80c52d3_0.conda#eb517c6a2b960c3ccb6f1db1005f063a +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.8.0-py313h8060acc_0.conda#375064d30e709bf7c1d4580e70aaea61 +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.57.0-py313h8060acc_0.conda#76b3a3367ac578a7cc43f4b7814e7e87 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.13.3-ha770c72_1.conda#9ccd736d31e0c6e41f54e704e5312811 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhd8ed1ab_0.conda#446bd6c8cb26050d528881df495ce646 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda#abb32c727da370c481a1c206f5159ce9 +https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda#928b8be80851f5d8ffb016f9c81dae7a +https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.67.1-hc2c308b_0.conda#4606a4647bfe857e3cfe21ca12ac3afb +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.11.2-default_h0d58e46_1001.conda#804ca9e91bcaea0824a341d55b1684f2 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda#452b98eafe050ecff932f0ec832dd03f +https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.4-he9d0ab4_0.conda#96c33bbd084ef2b2463503fb7f1482ae +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.9.2-h65c71a3_0.conda#d045b1d878031eb497cab44e6392b1df +https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda#e71f31f8cfb0a91439f2086fc8aa0461 +https://conda.anaconda.org/conda-forge/linux-64/mpc-1.3.1-h24ddda3_1.conda#aa14b9a5196a6d8dd364164b7ce56acf +https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/noarch/python-gil-3.13.3-h4df99d1_101.conda#82c2641f2f0f513f7d2d1b847a2588e3 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.5-hb9d3cd8_0.conda#eb44b3b6deb1cab08d72cb61686fe64c +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcomposite-0.4.6-hb9d3cd8_2.conda#d3c295b50f092ab525ffe3c2aa4b7413 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcursor-1.2.3-hb9d3cd8_0.conda#2ccd714aa2242315acaf0a67faea780b +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda#b5fcc7172d22516e1f965490e65e33a4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.6-hb9d3cd8_0.conda#5efa5fa6243a622445fdfd72aee15efa +https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda#aaa2a381ccc56eac91d63b6c1240312f +https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.7.7-hf454442_0.conda#947c82025693bebd557f782bb5d6b469 +https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.10.0-h113e628_0.conda#73f73f60854f325a55f1d31459f2ab73 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.8.0-h736e048_1.conda#13de36be8de3ae3f05ba127631599213 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee +https://conda.anaconda.org/conda-forge/linux-64/gmpy2-2.2.1-py313h11186cd_0.conda#54d020e0eaacf1e99bfb2410b9aa2e5e +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.4-default_h1df26ce_0.conda#96f8d5b2e94c9ba4fef19f1adf068a15 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-20.1.4-default_he06ed0a_0.conda#2d933632c8004be47deb2be61bf013be +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.32.0-h804f50b_0.conda#3d96df4d6b1c88455e05b94ce8a14a53 +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-31_he2f377e_openblas.conda#7e5fff7d0db69be3a266f7e79a3bb0e2 +https://conda.anaconda.org/conda-forge/linux-64/libmagma-2.8.0-h9ddd185_2.conda#8de40c4f75d36bb00a5870f682457f1d +https://conda.anaconda.org/conda-forge/linux-64/libpq-17.4-h27ae623_1.conda#37fba334855ef3b51549308e61ed7a3d +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.5-py313h17eae1a_0.conda#6ceeff9ed72e54e4a2f9a1c88f47bdde +https://conda.anaconda.org/conda-forge/linux-64/pillow-11.1.0-py313h8db990d_0.conda#1e86810c6c3fb6d6aebdba26564eb2e8 +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-6.1.1-pyhd8ed1ab_0.conda#1e35d8f975bc0e984a19819aa91c440a +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.13.0-hceb3a55_1.conda#ba7726b8df7b9d34ea80e82b097a4893 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f +https://conda.anaconda.org/conda-forge/noarch/array-api-strict-2.3.1-pyhd8ed1ab_0.conda#11107d0aeb8c590a34fee0894909816b +https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.29.7-hd92328a_7.conda#02b95564257d5c3db9c06beccf711f95 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.13.0-h3cf044e_1.conda#7eb66060455c7a47d9dcdbfa9f46579b +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-31_h1ea3ea9_openblas.conda#ba652ee0576396d4765e567f043c57f9 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-h3394656_0.conda#09262e66b19567aff4f592fb53b28760 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.3.2-py313h33d0bda_0.conda#5dc81fffe102f63045225007a33d6199 +https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.4.1-py313hc2a895b_0.conda#46dd595e816b278b178e3bef8a6acf71 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.32.0-h0121fbd_0.conda#877a5ec0431a5af83bf0cd0522bfe661 +https://conda.anaconda.org/conda-forge/linux-64/libmagma_sparse-2.8.0-h9ddd185_0.conda#f4eb3cfeaf9d91e72d5b2b8706bf059f +https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.2.2-ha957f24_16.conda#1459379c79dda834673426504d52b319 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py313ha87cce1_3.conda#6248b529e537b1d4cb5ab3ef7f537795 +https://conda.anaconda.org/conda-forge/linux-64/polars-1.27.1-py39h2a4a510_3.conda#fba08963eaa1f954480045d033d1221e +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.15.2-py313h86fcf2b_0.conda#ca68acd9febc86448eeed68d0c6c8643 +https://conda.anaconda.org/conda-forge/noarch/sympy-1.14.0-pyh2585a3b_105.conda#8c09fac3785696e1c477156192d64b91 +https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.458-hc430e4a_4.conda#aeefac461bea1f126653c1285cf5af08 +https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.12.0-ha633028_1.conda#7c1980f89dd41b097549782121a73490 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.131-openblas.conda#38b2ec894c69bb4be0e66d2ef7fc60bf +https://conda.anaconda.org/conda-forge/linux-64/cupy-13.4.1-py313h66a2ee2_0.conda#784d6bd149ef2b5d9c733ea3dd4d15ad +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-11.1.0-h3beb420_0.conda#95e3bb97f9cdc251c0c68640e9c10ed3 +https://conda.anaconda.org/conda-forge/linux-64/libtorch-2.5.1-cuda118_hb34f2e8_303.conda#da799bf557ff6376a1a58f40bddfb293 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.10.1-py313h129903b_0.conda#4e23b3fabf434b418e0d9c6975a6453f +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py313hf0ab243_1.conda#4c769bf3858f424cb2ecf952175ec600 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-18.1.0-h44a453e_6_cpu.conda#2cf6d608d6e66506f69797d5c6944c35 +https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.5.1-cuda118_py313h40cdc2d_303.conda#19ad990954a4ed89358d91d0a3e7016d +https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.9.0-h6441bc3_1.conda#4029a8dcb1d97ea241dbe5abfda1fad6 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-18.1.0-hcb10f89_6_cpu.conda#143f9288b64759a6427563f058c62f2b +https://conda.anaconda.org/conda-forge/linux-64/libparquet-18.1.0-h081d1f1_6_cpu.conda#68788df49ce7480187eb6387f15b2b67 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-18.1.0-py313he5f92c8_0_cpu.conda#5380e12f4468e891911dbbd4248b521a +https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.9.0-py313h5f61773_0.conda#f51f25ec8fcbf777f8b186bb5deeed40 +https://conda.anaconda.org/conda-forge/linux-64/pytorch-gpu-2.5.1-cuda126hf7c78f0_303.conda#afaf760e55725108ae78ed41198c49bb +https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-18.1.0-hcb10f89_6_cpu.conda#20ca46a6bc714a6ab189d5b3f46e66d8 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.10.1-py313h78bf25f_0.conda#d0c80dea550ca97fc0710b2ecef919ba +https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-18.1.0-h3ee7192_6_cpu.conda#aa313b3168caf98d00b3753f5ba27650 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-18.1.0-py313h78bf25f_0.conda#a11d880ceedc33993c6f5c14a80ea9d3 diff --git a/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_environment.yml b/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_environment.yml new file mode 100644 index 0000000000000..bbfb91d24fd1a --- /dev/null +++ b/build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_environment.yml @@ -0,0 +1,32 @@ +# DO NOT EDIT: this file is generated from the specification found in the +# following script to centralize the configuration for CI builds: +# build_tools/update_environments_and_lock_files.py +channels: + - conda-forge + - pytorch + - nvidia +dependencies: + - python + - numpy + - blas + - scipy + - cython + - joblib + - threadpoolctl + - matplotlib + - pandas + - pyamg + - pytest + - pytest-xdist + - pillow + - pip + - ninja + - meson-python + - pytest-cov + - coverage + - ccache + - pytorch-gpu + - polars + - pyarrow + - cupy + - array-api-strict diff --git a/build_tools/cirrus/pymin_conda_forge_environment.yml b/build_tools/github/pymin_conda_forge_arm_environment.yml similarity index 93% rename from build_tools/cirrus/pymin_conda_forge_environment.yml rename to build_tools/github/pymin_conda_forge_arm_environment.yml index 684c4636daad4..c65ab4aaecf14 100644 --- a/build_tools/cirrus/pymin_conda_forge_environment.yml +++ b/build_tools/github/pymin_conda_forge_arm_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - numpy - blas - scipy @@ -12,7 +12,7 @@ dependencies: - joblib - threadpoolctl - matplotlib - - pytest<8 + - pytest - pytest-xdist - pillow - pip diff --git a/build_tools/github/pymin_conda_forge_arm_linux-aarch64_conda.lock b/build_tools/github/pymin_conda_forge_arm_linux-aarch64_conda.lock new file mode 100644 index 0000000000000..dc7b4ae5c066e --- /dev/null +++ b/build_tools/github/pymin_conda_forge_arm_linux-aarch64_conda.lock @@ -0,0 +1,162 @@ +# Generated by conda-lock. +# platform: linux-aarch64 +# input_hash: f12646c755adbf5f02f95c5d07e868bf1570777923e737bc27273eb1a5e40cd7 +@EXPLICIT +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.43-h80caac9_4.conda#80c9ad5e05e91bb6c0967af3880c9742 +https://conda.anaconda.org/conda-forge/linux-aarch64/libglvnd-1.7.0-hd24410f_2.conda#9e115653741810778c9a915a2f8439e7 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-14.2.0-he277a41_2.conda#b11c09d9463daf4cae492d29806b1889 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.10-7_cp310.conda#44e871cba2b162368476a84b8d040b6c +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2#6168d71addc746e8f2b8d57dfd2edcea +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-aarch64/libegl-1.7.0-hd24410f_2.conda#cf105bce884e4ef8c8ccdca9fe6695e7 +https://conda.anaconda.org/conda-forge/linux-aarch64/libopengl-1.7.0-hd24410f_2.conda#cf9d12bfab305e48d095a4c79002c922 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-14.2.0-he277a41_2.conda#6b4268a60b10f29257b51b9b67ff8d76 +https://conda.anaconda.org/conda-forge/linux-aarch64/alsa-lib-1.2.14-h86ecc28_0.conda#a696b24c1b473ecc4774bcb5a6ac6337 +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.1.0-h86ecc28_2.conda#3ee026955c688f551a9999840cff4c67 +https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.23-he377734_0.conda#308ad7cbe9fd92add59ef3d547a42c17 +https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.0-h5ad3122_0.conda#d41a057e7968705dae8dcb7c8ba2c8dd +https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.6-he21f813_1.conda#15a131f30cae36e9a655ca81fee9a285 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-14.2.0-he9431aa_2.conda#692c2bb75f32cfafb6799cf6d1c5d0e0 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-14.2.0-hb6113d0_2.conda#cd754566661513808ef2408c4ab99a2f +https://conda.anaconda.org/conda-forge/linux-aarch64/libiconv-1.18-hc99b53d_1.conda#81541d85a45fbf4d0a29346176f1f21c +https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.1.0-h86ecc28_0.conda#a689388210d502364b79e8b19e7fa2cb +https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_1.conda#8ced9a547a29f7a71b7f15a4443ad1de +https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-14.2.0-h3f4de04_2.conda#eadee2cda99697e29411c1013c187b92 +https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.5.0-h0886dbf_0.conda#95ef4a689b8cc1b7e18b53784d88f96b +https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.1-h86ecc28_2.conda#08aad7cbe9f5a6b460d0976076b6ae64 +https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda#182afabe009dc78d8b73100255ee6868 +https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.5.0-hd08dc88_1.conda#ee68fdc3a8723e9c58bdd2f10544658f +https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-h86ecc28_1002.conda#bb5a90c93e3bac3d5690acf76b4a6386 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libice-1.1.2-h86ecc28_0.conda#c8d8ec3e00cd0fd8a231789b91a7c5b7 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.12-h86ecc28_0.conda#d5397424399a66d33c80b1f2345a36a6 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.5-h57736b2_0.conda#25a5a7b797fe6e084e04ffe2db02fc62 +https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h68df207_7.conda#56398c28220513b9ea13d7b450acfb20 +https://conda.anaconda.org/conda-forge/linux-aarch64/double-conversion-3.3.1-h5ad3122_0.conda#399959d889e1a73fc99f12ce480e77e1 +https://conda.anaconda.org/conda-forge/linux-aarch64/expat-2.7.0-h5ad3122_0.conda#c22e14e241ade3d3a74c0409c3d582a2 +https://conda.anaconda.org/conda-forge/linux-aarch64/keyutils-1.6.1-h4e544f5_0.tar.bz2#1f24853e59c68892452ef94ddd8afd4b +https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-hfdc4d58_1.conda#60dceb7e876f4d74a9cbd42bbbc6b9cf +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h86ecc28_2.conda#e64d0f3b59c7c4047446b97a8624a72d +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h86ecc28_2.conda#0e9bd365480c72b25c71a448257b537d +https://conda.anaconda.org/conda-forge/linux-aarch64/libedit-3.1.20250104-pl5321h976ea20_0.conda#fb640d776fc92b682a14e001980825b1 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-14.2.0-he9431aa_2.conda#d8b9d9dc0c8cd97d375b48e55947ba70 +https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda#c14f32510f694e3185704d89967ec422 +https://conda.anaconda.org/conda-forge/linux-aarch64/libntlm-1.4-hf897c2e_1002.tar.bz2#835c7c4137821de5c309f4266a51ba89 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpciaccess-0.18-h31becfc_0.conda#6d48179630f00e8c9ad9e30879ce1e54 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.47-hec79eb8_0.conda#c4b1ba0d7cef5002759d2f156722feee +https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.49.1-h5eb1b54_2.conda#7c45959e187fd3313f9f1734464baecc +https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-14.2.0-hf1166c9_2.conda#c934c1fddad582fcc385b608eb06a70c +https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.17.0-h262b8f6_0.conda#cd14ee5cca2464a425b1dbfc24d90db2 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda#b4df5d7d4b63579d081fd3a4cf99740e +https://conda.anaconda.org/conda-forge/linux-aarch64/mysql-common-9.2.0-h3f5c77f_0.conda#f9db1ad1a8897483edb3ac321d662e7b +https://conda.anaconda.org/conda-forge/linux-aarch64/ninja-1.12.1-h17cf362_1.conda#885414635e2a65ed06f284f6d569cdff +https://conda.anaconda.org/conda-forge/linux-aarch64/pixman-0.46.0-h86a87f0_0.conda#1328d5bad76f7b31926ccd2a33e0d6ef +https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8382b9d_2.conda#c0f08fc2737967edde1a272d4bf41ed9 +https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda#f75105e0585851f818e0009dd1dde4dc +https://conda.anaconda.org/conda-forge/linux-aarch64/wayland-1.23.1-h698ed42_1.conda#229b00f81a229af79547a7e4776ccf6e +https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-hbcf94c1_2.conda#5be90c5a3e4b43c53e38f50a85e11527 +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-h86ecc28_2.conda#7d48b185fe1f722f8cda4539bb931f85 +https://conda.anaconda.org/conda-forge/linux-aarch64/graphite2-1.3.13-h2f0025b_1003.conda#f33009add6a08358bc12d114ceec1304 +https://conda.anaconda.org/conda-forge/linux-aarch64/icu-75.1-hf9b3779_0.conda#268203e8b983fddb6412b36f2024e75c +https://conda.anaconda.org/conda-forge/linux-aarch64/krb5-1.21.3-h50a48e9_0.conda#29c10432a2ca1472b53f299ffb2ffa37 +https://conda.anaconda.org/conda-forge/linux-aarch64/libdrm-2.4.124-h86ecc28_0.conda#a8058bcb6b4fa195aaa20452437c7727 +https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype6-2.13.3-he93130f_1.conda#51eae9012d75b8f7e4b0adfe61a83330 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-14.2.0-he9431aa_2.conda#0980d7d931474a6a037ae66f1da4d2fe +https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.29-pthreads_h9d3fd7e_0.conda#a99e2bfcb1ad6362544c71281eb617e9 +https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.7.0-h88f7998_4.conda#6edd78ac9bee9a972f25cb6e8c6e21ad +https://conda.anaconda.org/conda-forge/linux-aarch64/mysql-libs-9.2.0-h11569fd_0.conda#72f21962b1205535d810b82f8f0fa342 +https://conda.anaconda.org/conda-forge/linux-aarch64/pcre2-10.44-hf4ec17f_2.conda#ab9d0f9a3c9ce23e4fd2af4edc6fa245 +https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.10.17-h256493d_0_cpython.conda#c496213b6ede3c5a30ce1bf02bebf382 +https://conda.anaconda.org/conda-forge/linux-aarch64/qhull-2020.2-h70be974_5.conda#bb138086d938e2b64f5f364945793ebf +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-0.4.1-h5c728e9_2.conda#b4cf8ba6cff9cdf1249bcfe1314222b0 +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-keysyms-0.4.1-h5c728e9_0.conda#57ca8564599ddf8b633c4ea6afee6f3a +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-renderutil-0.3.10-h5c728e9_0.conda#7beeda4223c5484ef72d89fb66b7e8c1 +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-wm-0.4.2-h5c728e9_0.conda#f14dcda6894722e421da2b7dcffb0b78 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libsm-1.2.6-h0808dbd_0.conda#2d1409c50882819cb1af2de82e2b7208 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libx11-1.8.12-hca56bd8_0.conda#3df132f0048b9639bc091ef22937c111 +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-h86ecc28_2.conda#5094acc34eb173f74205c0b55f0dd4a4 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda#962b9857ee8e7018c22f2776ffa0b2d7 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_1.conda#44600c4667a319d67dbe0681fc0bc833 +https://conda.anaconda.org/conda-forge/linux-aarch64/cyrus-sasl-2.1.27-hf6b2984_7.conda#7a85d417c8acd7a5215c082c5b9219e5 +https://conda.anaconda.org/conda-forge/linux-aarch64/cython-3.0.12-py310hc86cfe9_0.conda#4bd71650f315b643774841272d02911a +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_1.conda#a16662747cdeb9abbac74d0057cc976e +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_1.conda#a71efeae2c160f6789900ba2631a2c90 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_1.conda#6837f3eff7dcea42ecd714ce1ac2b108 +https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.7-py310h5d7f10c_0.conda#b86d594bf17c9ad7a291593368ae8ba7 +https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.17-hc88f144_0.conda#b87b1abd2542cf65a00ad2e2461a3083 +https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-31_h1a9f1db_openblas.conda#48bd5bf15ccf3e409840be9caafc0ad5 +https://conda.anaconda.org/conda-forge/linux-aarch64/libcups-2.3.3-h405e4a8_4.conda#d42c670b0c96c1795fd859d5e0275a55 +https://conda.anaconda.org/conda-forge/linux-aarch64/libfreetype-2.13.3-h8af1aa0_1.conda#2d4a1c3dcabb80b4a56d5c34bdacea08 +https://conda.anaconda.org/conda-forge/linux-aarch64/libglib-2.84.1-hc486b8e_0.conda#07cb059040220481ab9eda17cb86f644 +https://conda.anaconda.org/conda-forge/linux-aarch64/libglx-1.7.0-hd24410f_2.conda#1d4269e233636148696a67e2d30dad2a +https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee +https://conda.anaconda.org/conda-forge/linux-aarch64/libxml2-2.13.7-he060846_1.conda#b461618b5dafbc95c6f9492043cd991a +https://conda.anaconda.org/conda-forge/noarch/meson-1.8.0-pyh29332c3_0.conda#8e25221b702272394b86b0f4d7217f77 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.29-pthreads_h3a8cbd8_0.conda#4ec5b6144709ced5e7933977675f61c6 +https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.3-h3f56577_0.conda#04231368e4af50d11184b50e14250993 +https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda#58335b26c38bf4a20f399384c33cbcf9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_1.conda#e9dcbce5f45f9ee500e728ae58b605b6 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda#513d3c262ee49b54a8fec85c5bc99764 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.1.0-pyhff2d567_0.conda#f6f72d0837c79eaec77661be43e8a691 +https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda#9d64911b31d57ca443e9f1e36b04385f +https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda#ac944244f1fed2eb49bae07193ae8215 +https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4.2-py310h78583b1_0.conda#68a2bd5dcbb6feac96dee39f4b49fe0f +https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-16.0.0-py310ha766c32_0.conda#2936ce19a675e162962f396c7b40b905 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-image-0.4.0-h5c728e9_2.conda#b82e5c78dbbfa931980e8bfe83bce913 +https://conda.anaconda.org/conda-forge/linux-aarch64/xkeyboard-config-2.44-h86ecc28_0.conda#4d91bf5ccb5b31be8e070fda2ed13c50 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxext-1.3.6-h57736b2_0.conda#bd1e86dd8aa3afd78a4bfdb4ef918165 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxfixes-6.0.1-h57736b2_0.conda#78f8715c002cc66991d7c11e3cf66039 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrender-0.9.12-h86ecc28_0.conda#ae2c2dd0e2d38d249887727db2af960e +https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.11.3-h4889ad1_0.conda#e0b9e519da2bf0fb8c48381daf87a194 +https://conda.anaconda.org/conda-forge/linux-aarch64/dbus-1.13.6-h12b9eeb_3.tar.bz2#f3d63805602166bac09386741e00935e +https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.57.0-py310heeae437_0.conda#548b750f1b3ec57d07b0014f8081e9c2 +https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.13.3-h8af1aa0_1.conda#71c4cbe1b384a8e7b56993394a435343 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.0-pyhd8ed1ab_0.conda#3d7257f0a61c9aa4ffa3e324a887416b +https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-31_hab92f65_openblas.conda#6b81dbae56a519f1ec2f25e0ee2f4334 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgl-1.7.0-hd24410f_2.conda#0d00176464ebb25af83d40736a2cd3bb +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-31_h411afd4_openblas.conda#41dbff5eb805a75c120a7b7a1c744dc2 +https://conda.anaconda.org/conda-forge/linux-aarch64/libllvm20-20.1.4-h07bd352_0.conda#a83f31777ec098202198145883d86ffb +https://conda.anaconda.org/conda-forge/linux-aarch64/libxkbcommon-1.9.2-hbab7b08_0.conda#7b47a2ccfb81b4be6be320b365e1cf33 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxslt-1.1.39-h1cc9640_0.conda#13e1d3f9188e85c6d59a98651aced002 +https://conda.anaconda.org/conda-forge/linux-aarch64/openldap-2.6.9-h30c48ee_0.conda#c07822a5de65ce9797b9afa257faa917 +https://conda.anaconda.org/conda-forge/noarch/pip-25.1.1-pyh8b19718_0.conda#32d0781ace05105cc99af55d36cbec7c +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.9.1-pyhd8ed1ab_0.conda#22ae7c6ea81e0c8661ef32168dda929b +https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.5-pyhd8ed1ab_0.conda#c3c9316209dec74a705a36797970c6be +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e +https://conda.anaconda.org/conda-forge/linux-aarch64/xcb-util-cursor-0.1.5-h86ecc28_0.conda#d6bb2038d26fa118d5cbc2761116f3e5 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxcomposite-0.4.6-h86ecc28_2.conda#86051eee0766c3542be24844a9c3cf36 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxcursor-1.2.3-h86ecc28_0.conda#f2054759c2203d12d0007005e1f1296d +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdamage-1.1.6-h86ecc28_0.conda#d5773c4e4d64428d7ddaa01f6f845dc7 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxi-1.8.2-h57736b2_0.conda#eeee3bdb31c6acde2b81ad1b8c287087 +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxrandr-1.5.4-h86ecc28_0.conda#dd3e74283a082381aa3860312e3c721e +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxxf86vm-1.1.6-h86ecc28_0.conda#d745faa2d7c15092652e40a22bb261ed +https://conda.anaconda.org/conda-forge/linux-aarch64/fontconfig-2.15.0-h8dda3cd_1.conda#112b71b6af28b47c624bcbeefeea685b +https://conda.anaconda.org/conda-forge/linux-aarch64/libclang-cpp20.1-20.1.4-default_h7d4303a_0.conda#d71665eccdb65183c72e149424ec3928 +https://conda.anaconda.org/conda-forge/linux-aarch64/libclang13-20.1.4-default_h9e36cb9_0.conda#6d587caa650694fa5f6d04fda1bcfee2 +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-31_hc659ca5_openblas.conda#256bb281d78e5b8927ff13a1cde9f6f5 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpq-17.4-hf590da8_1.conda#10fdc78be541c9017e2144f86d092aa2 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.18.0-pyh70fd9c4_0.conda#576c04b9d9f8e45285fb4d9452c26133 +https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.2.5-py310h6e5608f_0.conda#5c521c566cbcf058769c613dee3a18d6 +https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-11.1.0-py310h34c99de_0.conda#c4fa80647a708505d65573c2353bc216 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_1.conda#59aad4fb37cabc0bacc73cf344612ddd +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxtst-1.2.5-h57736b2_3.conda#c05698071b5c8e0da82a282085845860 +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-31_h9678261_openblas.conda#a2cc143d7e25e52a915cb320e5b0d592 +https://conda.anaconda.org/conda-forge/linux-aarch64/cairo-1.18.4-h83712da_0.conda#cd55953a67ec727db5dc32b167201aa6 +https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.3.2-py310hf54e67a_0.conda#779694434d1f0a67c5260db76b7b7907 +https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.15.2-py310hf37559f_0.conda#5c9b72f10d2118d943a5eaaf2f396891 +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.131-openblas.conda#51c5f346e1ebee750f76066490059df9 +https://conda.anaconda.org/conda-forge/linux-aarch64/harfbuzz-11.1.0-h405b6a2_0.conda#6fd48c127b76a95ed3858c47fa9db7b0 +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.10.1-py310h2cc5e2d_0.conda#5652e355346f4823f6b4bfdd4860359d +https://conda.anaconda.org/conda-forge/linux-aarch64/qt6-main-6.9.0-ha483c8b_1.conda#fb32973c68de1f23a7e4de3651442b15 +https://conda.anaconda.org/conda-forge/linux-aarch64/pyside6-6.9.0-py310hee8ad4f_0.conda#68f556281ac23f1780381f00de99d66d +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.10.1-py310hbbe02a8_0.conda#c6aa0ea00ec104d0ad260c2ed2bb5582 diff --git a/build_tools/github/repair_windows_wheels.sh b/build_tools/github/repair_windows_wheels.sh index cdd0c0c79d8c4..8f51a34d4039b 100755 --- a/build_tools/github/repair_windows_wheels.sh +++ b/build_tools/github/repair_windows_wheels.sh @@ -8,6 +8,7 @@ DEST_DIR=$2 # By default, the Windows wheels are not repaired. # In this case, we need to vendor VCRUNTIME140.dll +pip install wheel wheel unpack "$WHEEL" WHEEL_DIRNAME=$(ls -d scikit_learn-*) python build_tools/github/vendor.py "$WHEEL_DIRNAME" diff --git a/build_tools/github/test_windows_wheels.sh b/build_tools/github/test_windows_wheels.sh index 07954a7a91970..c96ec4ad89d3e 100755 --- a/build_tools/github/test_windows_wheels.sh +++ b/build_tools/github/test_windows_wheels.sh @@ -4,12 +4,27 @@ set -e set -x PYTHON_VERSION=$1 +PROJECT_DIR=$2 -docker container run \ - --rm scikit-learn/minimal-windows \ - powershell -Command "python -c 'import sklearn; sklearn.show_versions()'" +python $PROJECT_DIR/build_tools/wheels/check_license.py -docker container run \ - -e SKLEARN_SKIP_NETWORK_TESTS=1 \ - --rm scikit-learn/minimal-windows \ - powershell -Command "pytest --pyargs sklearn" +FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")" + +if [[ $FREE_THREADED_BUILD == "False" ]]; then + # Run the tests for the scikit-learn wheel in a minimal Windows environment + # without any developer runtime libraries installed to ensure that it does not + # implicitly rely on the presence of the DLLs of such runtime libraries. + docker container run \ + --rm scikit-learn/minimal-windows \ + powershell -Command "python -c 'import sklearn; sklearn.show_versions()'" + + docker container run \ + -e SKLEARN_SKIP_NETWORK_TESTS=1 \ + --rm scikit-learn/minimal-windows \ + powershell -Command "pytest --pyargs sklearn" +else + # This is too cumbersome to use a Docker image in the free-threaded case + export PYTHON_GIL=0 + python -c "import sklearn; sklearn.show_versions()" + pytest --pyargs sklearn +fi diff --git a/build_tools/github/upload_anaconda.sh b/build_tools/github/upload_anaconda.sh index 5054b32a53c61..b53f27b75e72b 100755 --- a/build_tools/github/upload_anaconda.sh +++ b/build_tools/github/upload_anaconda.sh @@ -3,8 +3,8 @@ set -e set -x -# Note: build_wheels.sh has the same branch (only for NumPy 2.0 transition) -if [[ "$GITHUB_EVENT_NAME" == "schedule" || "$CIRRUS_CRON" == "nightly" ]]; then +if [[ "$GITHUB_EVENT_NAME" == "schedule" \ + || "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]]; then ANACONDA_ORG="scientific-python-nightly-wheels" ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN" else @@ -12,11 +12,9 @@ else ANACONDA_TOKEN="$SCIKIT_LEARN_STAGING_UPLOAD_TOKEN" fi -# Install Python 3.8 because of a bug with Python 3.9 export PATH=$CONDA/bin:$PATH -conda create -n upload -y python=3.8 +conda create -n upload -y anaconda-client source activate upload -conda install -y anaconda-client # Force a replacement if the remote file already exists anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG $ARTIFACTS_PATH/* diff --git a/build_tools/linting.sh b/build_tools/linting.sh index aefabfae7b3f5..34b37530e10ff 100755 --- a/build_tools/linting.sh +++ b/build_tools/linting.sh @@ -10,26 +10,25 @@ set -o pipefail global_status=0 -echo -e "### Running black ###\n" -black --check --diff . +echo -e "### Running the ruff linter ###\n" +ruff check --output-format=full status=$? - if [[ $status -eq 0 ]] then - echo -e "No problem detected by black\n" + echo -e "No problem detected by the ruff linter\n" else - echo -e "Problems detected by black, please run black and commit the result\n" + echo -e "Problems detected by ruff check, please fix them\n" global_status=1 fi -echo -e "### Running ruff ###\n" -ruff check --output-format=full . +echo -e "### Running the ruff formatter ###\n" +ruff format --diff status=$? if [[ $status -eq 0 ]] then - echo -e "No problem detected by ruff\n" + echo -e "No problem detected by the ruff formatter\n" else - echo -e "Problems detected by ruff, please fix them\n" + echo -e "Problems detected by ruff format, please run ruff format and commit the result\n" global_status=1 fi @@ -89,16 +88,15 @@ else fi # Check for joblib.delayed and joblib.Parallel imports -# TODO(1.7): remove ":!sklearn/utils/_joblib.py" echo -e "### Checking for joblib imports ###\n" joblib_status=0 -joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" +joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_delayed_import" ]; then echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:" echo "$joblib_delayed_import" joblib_status=1 fi -joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" +joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_Parallel_import" ]; then echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:" echo "$joblib_Parallel_import" diff --git a/build_tools/shared.sh b/build_tools/shared.sh index 4866c149d506f..3c6f238385506 100644 --- a/build_tools/shared.sh +++ b/build_tools/shared.sh @@ -29,7 +29,23 @@ show_installed_libraries(){ activate_environment() { if [[ "$DISTRIB" =~ ^conda.* ]]; then source activate $VIRTUALENV - elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" || "$DISTRIB" == "pip-nogil" ]]; then + elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" ]]; then source $VIRTUALENV/bin/activate fi } + +create_conda_environment_from_lock_file() { + ENV_NAME=$1 + LOCK_FILE=$2 + # Because we are using lock-files with the "explicit" format, conda can + # install them directly, provided the lock-file does not contain pip solved + # packages. For more details, see + # https://conda.github.io/conda-lock/output/#explicit-lockfile + lock_file_has_pip_packages=$(grep -q files.pythonhosted.org $LOCK_FILE && echo "true" || echo "false") + if [[ "$lock_file_has_pip_packages" == "false" ]]; then + conda create --name $ENV_NAME --file $LOCK_FILE + else + python -m pip install "$(get_dep conda-lock min)" + conda-lock install --name $ENV_NAME $LOCK_FILE + fi +} diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index 86da119ec4547..0edf62b5a0d7b 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -7,7 +7,7 @@ - make sure that the latest versions of all the dependencies are used in the CI. There is a scheduled workflow that does this, see .github/workflows/update-lock-files.yml. This is still useful to run this - script when when the automated PR fails and for example some packages need to + script when the automated PR fails and for example some packages need to be pinned. You can add the pins to this script, run it, and open a PR with the changes. - bump minimum dependencies in sklearn/_min_dependencies.py. Running this @@ -26,6 +26,7 @@ with pip. To run this script you need: +- conda - conda-lock. The version should match the one used in the CI in sklearn/_min_dependencies.py - pip-tools @@ -82,12 +83,7 @@ docstring_test_dependencies = ["sphinx", "numpydoc"] -default_package_constraints = { - # TODO: somehow pytest 8 does not seem to work with meson editable - # install. Exit code is 5, i.e. no test collected - # This would be fixed by https://github.com/mesonbuild/meson-python/pull/569 - "pytest": "<8", -} +default_package_constraints = {} def remove_from(alist, to_remove): @@ -95,13 +91,30 @@ def remove_from(alist, to_remove): build_metadata_list = [ + { + "name": "pylatest_conda_forge_cuda_array-api_linux-64", + "type": "conda", + "tag": "cuda", + "folder": "build_tools/github", + "platform": "linux-64", + "channels": ["conda-forge", "pytorch", "nvidia"], + "conda_dependencies": common_dependencies + + [ + "ccache", + "pytorch-gpu", + "polars", + "pyarrow", + "cupy", + "array-api-strict", + ], + }, { "name": "pylatest_conda_forge_mkl_linux-64", "type": "conda", "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": common_dependencies + [ "ccache", @@ -109,12 +122,11 @@ def remove_from(alist, to_remove): "pytorch-cpu", "polars", "pyarrow", - "array-api-compat", "array-api-strict", + "scipy-doctest", ], "package_constraints": { "blas": "[build=mkl]", - "pytorch": "1.13", }, }, { @@ -123,7 +135,7 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": common_dependencies + [ "ccache", @@ -140,9 +152,9 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", - "channel": "defaults", + "channels": ["defaults"], "conda_dependencies": remove_from( - common_dependencies, ["cython", "threadpoolctl"] + common_dependencies, ["cython", "threadpoolctl", "meson-python"] ) + ["ccache"], "package_constraints": { @@ -152,35 +164,32 @@ def remove_from(alist, to_remove): # channel. "scipy": "<1.12", }, - # TODO: put cython and threadpoolctl back to conda dependencies when required - # version is available on the main channel - "pip_dependencies": ["cython", "threadpoolctl"], + # TODO: put cython, threadpoolctl and meson-python back to conda + # dependencies when required version is available on the main channel + "pip_dependencies": ["cython", "threadpoolctl", "meson-python", "meson"], }, { - "name": "pymin_conda_defaults_openblas", + "name": "pymin_conda_forge_openblas_min_dependencies", "type": "conda", "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "defaults", - "conda_dependencies": remove_from( - common_dependencies, - ["pandas", "threadpoolctl", "pip", "ninja", "meson-python"], - ) - + ["ccache"], + "channels": ["conda-forge"], + "conda_dependencies": common_dependencies + ["ccache", "polars"], "package_constraints": { - "python": "3.9", + "python": "3.10", "blas": "[build=openblas]", - "numpy": "1.21", # the min version is not available on the defaults channel - "scipy": "1.7", # the min version has some low level crashes + "numpy": "min", + "scipy": "min", "matplotlib": "min", "cython": "min", "joblib": "min", "threadpoolctl": "min", + "meson-python": "min", + "pandas": "min", + "polars": "min", + "pyamg": "min", }, - # TODO: put pip dependencies back to conda dependencies when required - # version is available on the defaults channel. - "pip_dependencies": ["threadpoolctl"], }, { "name": "pymin_conda_forge_openblas_ubuntu_2204", @@ -188,14 +197,14 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": ( - common_dependencies_without_coverage + remove_from(common_dependencies_without_coverage, ["matplotlib"]) + docstring_test_dependencies + ["ccache"] ), "package_constraints": { - "python": "3.9", + "python": "3.10", "blas": "[build=openblas]", }, }, @@ -205,16 +214,18 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "defaults", + "channels": ["defaults"], "conda_dependencies": ["python", "ccache"], "pip_dependencies": ( remove_from(common_dependencies, ["python", "blas", "pip"]) + docstring_test_dependencies + # Test with some optional dependencies + ["lightgbm", "scikit-image"] + # Test array API on CPU without PyTorch + + ["array-api-strict"] + # doctests dependencies + + ["scipy-doctest"] ), - "package_constraints": { - "python": "3.9", - }, }, { "name": "pylatest_pip_scipy_dev", @@ -222,7 +233,7 @@ def remove_from(alist, to_remove): "tag": "scipy-dev", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "defaults", + "channels": ["defaults"], "conda_dependencies": ["python", "ccache"], "pip_dependencies": ( remove_from( @@ -251,23 +262,29 @@ def remove_from(alist, to_remove): ), }, { - "name": "pypy3", + "name": "pylatest_free_threaded", "type": "conda", - "tag": "pypy", + "tag": "free-threaded", "folder": "build_tools/azure", "platform": "linux-64", - "channel": "conda-forge", - "conda_dependencies": ( - ["pypy", "python"] - + remove_from( - common_dependencies_without_coverage, ["python", "pandas", "pillow"] - ) - + ["ccache"] - ), - "package_constraints": { - "blas": "[build=openblas]", - "python": "3.9", - }, + "channels": ["conda-forge"], + "conda_dependencies": [ + "python-freethreading", + "numpy", + # TODO add cython and scipy when there are conda-forge packages for + # them and remove dev version install in + # build_tools/azure/install.sh. Note that for now conda-lock does + # not deal with free-threaded wheels correctly, see + # https://github.com/conda/conda-lock/issues/754. + "joblib", + "threadpoolctl", + "pytest", + "pytest-xdist", + "ninja", + "meson-python", + "ccache", + "pip", + ], }, { "name": "pymin_conda_forge_mkl", @@ -275,14 +292,14 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/azure", "platform": "win-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + [ "wheel", "pip", ], "package_constraints": { - "python": "3.9", + "python": "3.10", "blas": "[build=mkl]", }, }, @@ -292,7 +309,7 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/circle", "platform": "linux-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": common_dependencies_without_coverage + [ "scikit-image", @@ -307,10 +324,17 @@ def remove_from(alist, to_remove): "plotly", "polars", "pooch", + "sphinx-remove-toctrees", + "sphinx-design", + "pydata-sphinx-theme", + "towncrier", + ], + "pip_dependencies": [ + "sphinxext-opengraph", + "sphinxcontrib-sass", ], - "pip_dependencies": ["sphinxext-opengraph"], "package_constraints": { - "python": "3.9", + "python": "3.10", "numpy": "min", "scipy": "min", "matplotlib": "min", @@ -325,6 +349,13 @@ def remove_from(alist, to_remove): "sphinxext-opengraph": "min", "plotly": "min", "polars": "min", + "pooch": "min", + "pyamg": "min", + "sphinx-design": "min", + "sphinxcontrib-sass": "min", + "sphinx-remove-toctrees": "min", + "pydata-sphinx-theme": "min", + "towncrier": "min", }, }, { @@ -333,7 +364,7 @@ def remove_from(alist, to_remove): "tag": "main-ci", "folder": "build_tools/circle", "platform": "linux-64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": common_dependencies_without_coverage + [ "scikit-image", @@ -349,29 +380,37 @@ def remove_from(alist, to_remove): "polars", "pooch", "sphinxext-opengraph", + "sphinx-remove-toctrees", + "sphinx-design", + "pydata-sphinx-theme", + "towncrier", + ], + "pip_dependencies": [ + "jupyterlite-sphinx", + "jupyterlite-pyodide-kernel", + "sphinxcontrib-sass", ], - "pip_dependencies": ["jupyterlite-sphinx", "jupyterlite-pyodide-kernel"], "package_constraints": { - "python": "3.9", + "python": "3.10", }, }, { - "name": "pymin_conda_forge", + "name": "pymin_conda_forge_arm", "type": "conda", - "tag": "arm", - "folder": "build_tools/cirrus", + "tag": "main-ci", + "folder": "build_tools/github", "platform": "linux-aarch64", - "channel": "conda-forge", + "channels": ["conda-forge"], "conda_dependencies": remove_from( common_dependencies_without_coverage, ["pandas", "pyamg"] ) + ["pip", "ccache"], "package_constraints": { - "python": "3.9", + "python": "3.10", }, }, { - "name": "debian_atlas_32bit", + "name": "debian_32bit", "type": "pip", "tag": "main-ci", "folder": "build_tools/azure", @@ -384,16 +423,9 @@ def remove_from(alist, to_remove): "ninja", "meson-python", ], - "package_constraints": { - "joblib": "min", - "threadpoolctl": "3.1.0", - "pytest": "min", - "pytest-cov": "min", - # no pytest-xdist because it causes issue on 32bit - "cython": "min", - }, - # same Python version as in debian-32 build - "python_version": "3.9.2", + # Python version from the python3 APT package in the debian-32 docker + # image. + "python_version": "3.12.5", }, { "name": "ubuntu_atlas", @@ -426,7 +458,7 @@ def execute_command(command_list): ) out, err = proc.communicate() - out, err = out.decode(), err.decode() + out, err = out.decode(errors="replace"), err.decode(errors="replace") if proc.returncode != 0: command_str = " ".join(command_list) @@ -478,7 +510,9 @@ def get_conda_environment_content(build_metadata): # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py channels: - - {{ build_metadata['channel'] }} + {% for channel in build_metadata['channels'] %} + - {{ channel }} + {% endfor %} dependencies: {% for conda_dep in build_metadata['conda_dependencies'] %} - {{ conda_dep | get_package_with_constraint(build_metadata) }} @@ -609,9 +643,9 @@ def write_pip_lock_file(build_metadata): json_output = execute_command(["conda", "info", "--json"]) conda_info = json.loads(json_output) - environment_folder = [ + environment_folder = next( each for each in conda_info["envs"] if each.endswith(environment_name) - ][0] + ) environment_path = Path(environment_folder) pip_compile_path = environment_path / "bin" / "pip-compile" @@ -726,6 +760,7 @@ def main(select_build, skip_build, select_tag, verbose, very_verbose): filtered_conda_build_metadata_list = [ each for each in filtered_build_metadata_list if each["type"] == "conda" ] + if filtered_conda_build_metadata_list: logger.info("# Writing conda environments") write_all_conda_environments(filtered_conda_build_metadata_list) diff --git a/build_tools/wheels/LICENSE_linux.txt b/build_tools/wheels/LICENSE_linux.txt new file mode 100644 index 0000000000000..057656fcc789d --- /dev/null +++ b/build_tools/wheels/LICENSE_linux.txt @@ -0,0 +1,80 @@ +This binary distribution of scikit-learn also bundles the following software: + +---- + +Name: GCC runtime library +Files: scikit_learn.libs/libgomp*.so* +Availability: https://gcc.gnu.org/git/?p=gcc.git;a=tree;f=libgomp + +GCC RUNTIME LIBRARY EXCEPTION + +Version 3.1, 31 March 2009 + +Copyright (C) 2009 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +This GCC Runtime Library Exception ("Exception") is an additional +permission under section 7 of the GNU General Public License, version +3 ("GPLv3"). It applies to a given file (the "Runtime Library") that +bears a notice placed by the copyright holder of the file stating that +the file is governed by GPLv3 along with this Exception. + +When you use GCC to compile a program, GCC may combine portions of +certain GCC header files and runtime libraries with the compiled +program. The purpose of this Exception is to allow compilation of +non-GPL (including proprietary) programs to use, in this way, the +header files and runtime libraries covered by this Exception. + +0. Definitions. + +A file is an "Independent Module" if it either requires the Runtime +Library for execution after a Compilation Process, or makes use of an +interface provided by the Runtime Library, but is not otherwise based +on the Runtime Library. + +"GCC" means a version of the GNU Compiler Collection, with or without +modifications, governed by version 3 (or a specified later version) of +the GNU General Public License (GPL) with the option of using any +subsequent versions published by the FSF. + +"GPL-compatible Software" is software whose conditions of propagation, +modification and use would permit combination with GCC in accord with +the license of GCC. + +"Target Code" refers to output from any compiler for a real or virtual +target processor architecture, in executable form or suitable for +input to an assembler, loader, linker and/or execution +phase. Notwithstanding that, Target Code does not include data in any +format that is used as a compiler intermediate representation, or used +for producing a compiler intermediate representation. + +The "Compilation Process" transforms code entirely represented in +non-intermediate languages designed for human-written code, and/or in +Java Virtual Machine byte code, into Target Code. Thus, for example, +use of source code generators and preprocessors need not be considered +part of the Compilation Process, since the Compilation Process can be +understood as starting with the output of the generators or +preprocessors. + +A Compilation Process is "Eligible" if it is done using GCC, alone or +with other GPL-compatible software, or if it is done without using any +work based on GCC. For example, using non-GPL-compatible Software to +optimize any GCC intermediate representations would not qualify as an +Eligible Compilation Process. + +1. Grant of Additional Permission. + +You have permission to propagate a work of Target Code formed by +combining the Runtime Library with Independent Modules, even if such +propagation would otherwise violate the terms of GPLv3, provided that +all Target Code was generated by Eligible Compilation Processes. You +may then convey such a combination under terms of your choice, +consistent with the licensing of the Independent Modules. + +2. No Weakening of GCC Copyleft. + +The availability of this Exception does not imply any general +presumption that third-party software is unaffected by the copyleft +requirements of the license of GCC. diff --git a/build_tools/wheels/LICENSE_macos.txt b/build_tools/wheels/LICENSE_macos.txt new file mode 100644 index 0000000000000..61a523f47663c --- /dev/null +++ b/build_tools/wheels/LICENSE_macos.txt @@ -0,0 +1,286 @@ +This binary distribution of scikit-learn also bundles the following software: + +---- + +Name: libomp runtime library +Files: sklearn/.dylibs/libomp.dylib +Availability: https://github.com/llvm/llvm-project + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. diff --git a/build_tools/wheels/LICENSE_windows.txt b/build_tools/wheels/LICENSE_windows.txt new file mode 100644 index 0000000000000..9e98ad8defac2 --- /dev/null +++ b/build_tools/wheels/LICENSE_windows.txt @@ -0,0 +1,25 @@ +This binary distribution of scikit-learn also bundles the following software: + +---- + +Name: Microsoft Visual C++ Runtime Files +Files: sklearn\.libs\*.dll +Availability: https://learn.microsoft.com/en-us/visualstudio/releases/2015/2015-redistribution-vs + +Subject to the License Terms for the software, you may copy and distribute with your +program any of the files within the followng folder and its subfolders except as noted +below. You may not modify these files. + +C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\redist + +You may not distribute the contents of the following folders: + +C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\redist\debug_nonredist +C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\redist\onecore\debug_nonredist + +Subject to the License Terms for the software, you may copy and distribute the following +files with your program in your program’s application local folder or by deploying them +into the Global Assembly Cache (GAC): + +VC\atlmfc\lib\mfcmifc80.dll +VC\atlmfc\lib\amd64\mfcmifc80.dll diff --git a/build_tools/wheels/build_wheels.sh b/build_tools/wheels/build_wheels.sh index d2df4e3936829..02b05bc8a2795 100755 --- a/build_tools/wheels/build_wheels.sh +++ b/build_tools/wheels/build_wheels.sh @@ -38,8 +38,8 @@ if [[ $(uname) == "Darwin" ]]; then OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" fi - sudo conda create -n build $OPENMP_URL - PREFIX="$CONDA_HOME/envs/build" + conda create -n build $OPENMP_URL + PREFIX="$HOME/miniconda3/envs/build" export CC=/usr/bin/clang export CXX=/usr/bin/clang++ @@ -49,14 +49,11 @@ if [[ $(uname) == "Darwin" ]]; then export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib -L$PREFIX/lib -lomp" fi - -if [[ "$GITHUB_EVENT_NAME" == "schedule" || "$CIRRUS_CRON" == "nightly" ]]; then - # Nightly build: See also `../github/upload_anaconda.sh` (same branching). - # To help with NumPy 2.0 transition, ensure that we use the NumPy 2.0 - # nightlies. This lives on the edge and opts-in to all pre-releases. - # That could be an issue, in which case no-build-isolation and a targeted - # NumPy install may be necessary, instead. - export CIBW_BUILD_FRONTEND='pip; args: --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"' +if [[ "$CIBW_FREE_THREADED_SUPPORT" =~ [tT]rue ]]; then + # Numpy, scipy, Cython only have free-threaded wheels on scientific-python-nightly-wheels + # TODO: remove this after CPython 3.13 is released (scheduled October 2024) + # and our dependencies have free-threaded wheels on PyPI + export CIBW_BUILD_FRONTEND='pip; args: --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" --only-binary :all:' fi # The version of the built dependencies are specified diff --git a/build_tools/wheels/check_license.py b/build_tools/wheels/check_license.py new file mode 100644 index 0000000000000..00fe4169be65d --- /dev/null +++ b/build_tools/wheels/check_license.py @@ -0,0 +1,30 @@ +"""Checks the bundled license is installed with the wheel.""" + +import platform +import site +from itertools import chain +from pathlib import Path + +site_packages = site.getsitepackages() + +site_packages_path = (Path(p) for p in site_packages) + +try: + distinfo_path = next( + chain( + s + for site_package in site_packages_path + for s in site_package.glob("scikit_learn-*.dist-info") + ) + ) +except StopIteration as e: + raise RuntimeError("Unable to find scikit-learn's dist-info") from e + +license_text = (distinfo_path / "COPYING").read_text() + +assert "Copyright (c)" in license_text + +assert ( + "This binary distribution of scikit-learn also bundles the following software" + in license_text +), f"Unable to find bundled license for {platform.system()}" diff --git a/build_tools/wheels/cibw_before_build.sh b/build_tools/wheels/cibw_before_build.sh new file mode 100755 index 0000000000000..4e4558db5a5bc --- /dev/null +++ b/build_tools/wheels/cibw_before_build.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -euxo pipefail + +PROJECT_DIR="$1" +LICENSE_FILE="$PROJECT_DIR/COPYING" + +echo "" >>"$LICENSE_FILE" +echo "----" >>"$LICENSE_FILE" +echo "" >>"$LICENSE_FILE" + +if [[ $RUNNER_OS == "Linux" ]]; then + cat $PROJECT_DIR/build_tools/wheels/LICENSE_linux.txt >>"$LICENSE_FILE" +elif [[ $RUNNER_OS == "macOS" ]]; then + cat $PROJECT_DIR/build_tools/wheels/LICENSE_macos.txt >>"$LICENSE_FILE" +elif [[ $RUNNER_OS == "Windows" ]]; then + cat $PROJECT_DIR/build_tools/wheels/LICENSE_windows.txt >>"$LICENSE_FILE" +fi diff --git a/build_tools/wheels/test_wheels.sh b/build_tools/wheels/test_wheels.sh index e8cdf4b3ea8a2..1d6ee19bda8a8 100755 --- a/build_tools/wheels/test_wheels.sh +++ b/build_tools/wheels/test_wheels.sh @@ -3,9 +3,21 @@ set -e set -x +PROJECT_DIR="$1" + +python $PROJECT_DIR/build_tools/wheels/check_license.py + python -c "import joblib; print(f'Number of cores (physical): \ {joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')" +FREE_THREADED_BUILD="$(python -c"import sysconfig; print(bool(sysconfig.get_config_var('Py_GIL_DISABLED')))")" +if [[ $FREE_THREADED_BUILD == "True" ]]; then + # TODO: delete when importing numpy no longer enables the GIL + # setting to zero ensures the GIL is disabled while running the + # tests under free-threaded python + export PYTHON_GIL=0 +fi + # Test that there are no links to system libraries in the # threadpoolctl output section of the show_versions output: python -c "import sklearn; sklearn.show_versions()" diff --git a/doc/Makefile b/doc/Makefile index 44f02585f6205..1419bac49316d 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = -T +SPHINXOPTS ?= -T SPHINXBUILD ?= sphinx-build PAPER = BUILDDIR = _build @@ -47,9 +47,17 @@ help: clean: -rm -rf $(BUILDDIR)/* + @echo "Removed $(BUILDDIR)/*" -rm -rf auto_examples/ + @echo "Removed auto_examples/" -rm -rf generated/* + @echo "Removed generated/" -rm -rf modules/generated/ + @echo "Removed modules/generated/" + -rm -rf css/styles/ + @echo "Removed css/styles/" + -rm -rf api/*.rst + @echo "Removed api/*.rst" # Default to SPHINX_NUMJOBS=1 for full documentation build. Using # SPHINX_NUMJOBS!=1 may actually slow down the build, or cause weird issues in @@ -58,6 +66,7 @@ clean: # https://github.com/scikit-learn/scikit-learn/pull/25809 html: SPHINX_NUMJOBS ?= 1 html: + @echo $(ALLSPHINXOPTS) # These two lines make the build a bit more lengthy, and the # the embedding of images more robust rm -rf $(BUILDDIR)/html/_images diff --git a/doc/about.rst b/doc/about.rst index e7083569fd128..4db39f9709e73 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -1,26 +1,28 @@ .. _about: +======== About us ======== History -------- +======= This project was started in 2007 as a Google Summer of Code project by -David Cournapeau. Later that year, Matthieu Brucher started work on -this project as part of his thesis. +David Cournapeau. Later that year, Matthieu Brucher started working on this project +as part of his thesis. In 2010 Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort and Vincent Michel of INRIA took leadership of the project and made the first public release, February the 1st 2010. Since then, several releases have appeared -following a ~ 3-month cycle, and a thriving international community has -been leading the development. +following an approximately 3-month cycle, and a thriving international +community has been leading the development. As a result, INRIA holds the +copyright over the work done by people who were employed by INRIA at the +time of the contribution. Governance ----------- +========== -The decision making process and governance structure of scikit-learn is laid -out in the :ref:`governance document `. +The decision making process and governance structure of scikit-learn, like roles and responsibilities, is laid out in the :ref:`governance document `. .. The "author" anchors below is there to ensure that old html links (in the form of "about.html#author" still work) @@ -28,14 +30,17 @@ out in the :ref:`governance document `. .. _authors: The people behind scikit-learn -------------------------------- +============================== Scikit-learn is a community project, developed by a large group of -people, all across the world. A few teams, listed below, have central -roles, however a more complete list of contributors can be found `on +people, all across the world. A few core contributor teams, listed below, have +central roles, however a more complete list of contributors can be found `on github `__. +Active Core Contributors +------------------------ + Maintainers Team ................ @@ -44,14 +49,16 @@ consolidating scikit-learn's development and maintenance: .. include:: maintainers.rst -Please do not email the authors directly to ask for assistance or report issues. -Instead, please see `What's the best way to ask questions about scikit-learn -`_ -in the FAQ. +.. note:: + + Please do not email the authors directly to ask for assistance or report issues. + Instead, please see `What's the best way to ask questions about scikit-learn + `_ + in the FAQ. .. seealso:: - :ref:`How you can contribute to the project ` + How you can :ref:`contribute to the project `. Documentation Team .................. @@ -77,9 +84,11 @@ The following people help with :ref:`communication around scikit-learn .. include:: communication_team.rst +Emeritus Core Contributors +-------------------------- -Emeritus Core Developers ------------------------- +Emeritus Maintainers Team +......................... The following people have been active contributors in the past, but are no longer active in the project: @@ -87,7 +96,7 @@ longer active in the project: .. include:: maintainers_emeritus.rst Emeritus Communication Team ---------------------------- +........................... The following people have been active in the communication team in the past, but no longer have communication responsibilities: @@ -95,7 +104,7 @@ past, but no longer have communication responsibilities: .. include:: communication_team_emeritus.rst Emeritus Contributor Experience Team ------------------------------------- +.................................... The following people have been active in the contributor experience team in the past: @@ -105,7 +114,7 @@ past: .. _citing-scikit-learn: Citing scikit-learn -------------------- +=================== If you use scikit-learn in a scientific publication, we would appreciate citations to the following paper: @@ -150,469 +159,339 @@ Bibtex entry:: } Artwork -------- +======= High quality PNG and SVG logos are available in the `doc/logos/ `_ source directory. .. image:: images/scikit-learn-logo-notext.png - :align: center + :align: center Funding -------- -Scikit-Learn is a community driven project, however institutional and private +======= + +Scikit-learn is a community driven project, however institutional and private grants help to assure its sustainability. The project would like to thank the following funders. ................................... +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
-
+ `:probabl. `_ employs Adrin Jalali, Arturo Amor, + François Goupil, Guillaume Lemaitre, Jérémie du Boisberranger, Loïc Estève, + Olivier Grisel, and Stefanie Senger. -`:probabl. `_ funds Adrin Jalali, Arturo Amor, -François Goupil, Guillaume Lemaitre, Jérémie du Boisberranger, Olivier Grisel, and -Stefanie Senger. - -.. raw:: html - -
- -
- -.. image:: images/probabl.png - :width: 75pt - :align: center - :target: https://probabl.ai - -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/probabl.png + :target: https://probabl.ai .......... -.. raw:: html - -
-
- -The `Members `_ of -the `Scikit-Learn Consortium at Inria Foundation -`_ help at maintaining and -improving the project through their financial support. - -.. raw:: html - -
- .. |chanel| image:: images/chanel.png - :width: 55pt - :target: https://www.chanel.com + :target: https://www.chanel.com .. |axa| image:: images/axa.png - :width: 40pt - :target: https://www.axa.fr/ + :target: https://www.axa.fr/ .. |bnp| image:: images/bnp.png - :width: 120pt - :target: https://www.bnpparibascardif.com/ + :target: https://www.bnpparibascardif.com/ .. |dataiku| image:: images/dataiku.png - :width: 55pt - :target: https://www.dataiku.com/ - -.. |hf| image:: images/huggingface_logo-noborder.png - :width: 55pt - :target: https://huggingface.co + :target: https://www.dataiku.com/ .. |nvidia| image:: images/nvidia.png - :width: 55pt - :target: https://www.nvidia.com + :target: https://www.nvidia.com .. |inria| image:: images/inria-logo.jpg - :width: 75pt - :target: https://www.inria.fr - + :target: https://www.inria.fr .. raw:: html -
- -.. table:: - :class: sk-sponsor-table - - +----------+-----------+ - | |chanel| | - +----------+-----------+ - | | - +----------+-----------+ - | |axa| | |bnp| | - +----------+-----------+ - | | - +----------+-----------+ - | |nvidia| | |hf| | - +----------+-----------+ - | | - +----------+-----------+ - | |dataiku| | - +----------+-----------+ - | | - +----------+-----------+ - | |inria| | - +----------+-----------+ + -.......... +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
-
+ The `Members `_ of + the `Scikit-learn Consortium at Inria Foundation + `_ help at maintaining and + improving the project through their financial support. -`NVidia `_ funds Tim Head since 2022 -and is part of the scikit-learn consortium at Inria. + .. div:: image-box -.. raw:: html + .. table:: + :class: image-subtable -
+ +----------+-----------+ + | |chanel| | + +----------+-----------+ + | |axa| | |bnp| | + +----------+-----------+ + | |nvidia| | + +----------+-----------+ + | |dataiku| | + +----------+-----------+ + | |inria| | + +----------+-----------+ -
+.......... -.. image:: images/nvidia.png - :width: 55pt - :align: center - :target: https://nvidia.com +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
-
+ `NVidia `_ funds Tim Head since 2022 + and is part of the scikit-learn consortium at Inria. -.......... + .. div:: image-box -.. raw:: html + .. image:: images/nvidia.png + :target: https://nvidia.com -
-
+.......... -`Microsoft `_ funds Andreas Müller since 2020. +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
+ `Microsoft `_ funds Andreas Müller since 2020. -
+ .. div:: image-box -.. image:: images/microsoft.png - :width: 100pt - :align: center - :target: https://www.microsoft.com/ + .. image:: images/microsoft.png + :target: https://microsoft.com -.. raw:: html +........... -
-
+.. div:: sk-text-image-grid-small -........... + .. div:: text-box -.. raw:: html + `Quansight Labs `_ funds Lucy Liu since 2022. -
-
+ .. div:: image-box -`Quansight Labs `_ funds Lucy Liu since 2022. + .. image:: images/quansight-labs.png + :target: https://labs.quansight.org -.. raw:: html +........... -
+.. |czi| image:: images/czi.png + :target: https://chanzuckerberg.com -
+.. |wellcome| image:: images/wellcome-trust.png + :target: https://wellcome.org/ -.. image:: images/quansight-labs.png - :width: 100pt - :align: center - :target: https://labs.quansight.org +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
-
+ `The Chan-Zuckerberg Initiative `_ and + `Wellcome Trust `_ fund scikit-learn through the + `Essential Open Source Software for Science (EOSS) `_ + cycle 6. -Past Sponsors -............. + It supports Lucy Liu and diversity & inclusion initiatives that will + be announced in the future. -.. raw:: html + .. div:: image-box -
-
+ .. table:: + :class: image-subtable -`Quansight Labs `_ funded Meekail Zain in 2022 and 2023 and, -funded Thomas J. Fan from 2021 to 2023. + +----------+----------------+ + | |czi| | |wellcome| | + +----------+----------------+ -.. raw:: html +........... -
+.. div:: sk-text-image-grid-small -
+ .. div:: text-box -.. image:: images/quansight-labs.png - :width: 100pt - :align: center - :target: https://labs.quansight.org + `Tidelift `_ supports the project via their service + agreement. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/Tidelift-logo-on-light.svg + :target: https://tidelift.com/ ........... -.. raw:: html -
-
+Past Sponsors +------------- -`Columbia University `_ funded Andreas Müller -(2016-2020). +.. div:: sk-text-image-grid-small -.. raw:: html + .. div:: text-box -
+ `Quansight Labs `_ funded Meekail Zain in 2022 and 2023, + and funded Thomas J. Fan from 2021 to 2023. -
+ .. div:: image-box -.. image:: images/columbia.png - :width: 50pt - :align: center - :target: https://www.columbia.edu/ + .. image:: images/quansight-labs.png + :target: https://labs.quansight.org -.. raw:: html +........... -
-
+.. div:: sk-text-image-grid-small -........ + .. div:: text-box -.. raw:: html + `Columbia University `_ funded Andreas Müller + (2016-2020). -
-
+ .. div:: image-box -`The University of Sydney `_ funded Joel Nothman -(2017-2021). + .. image:: images/columbia.png + :target: https://columbia.edu -.. raw:: html +........ -
+.. div:: sk-text-image-grid-small -
+ .. div:: text-box -.. image:: images/sydney-primary.jpeg - :width: 100pt - :align: center - :target: https://sydney.edu.au/ + `The University of Sydney `_ funded Joel Nothman + (2017-2021). -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/sydney-primary.jpeg + :target: https://sydney.edu.au/ ........... -.. raw:: html - -
-
- -Andreas Müller received a grant to improve scikit-learn from the -`Alfred P. Sloan Foundation `_ . -This grant supported the position of Nicolas Hug and Thomas J. Fan. - -.. raw:: html - -
+.. div:: sk-text-image-grid-small -
+ .. div:: text-box -.. image:: images/sloan_banner.png - :width: 100pt - :align: center - :target: https://sloan.org/ + Andreas Müller received a grant to improve scikit-learn from the + `Alfred P. Sloan Foundation `_ . + This grant supported the position of Nicolas Hug and Thomas J. Fan. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/sloan_banner.png + :target: https://sloan.org/ ............. -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -`INRIA `_ actively supports this project. It has -provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler -(2012-2013) and Olivier Grisel (2013-2017) to work on this project -full-time. It also hosts coding sprints and other events. + `INRIA `_ actively supports this project. It has + provided funding for Fabian Pedregosa (2010-2012), Jaques Grobler + (2012-2013) and Olivier Grisel (2013-2017) to work on this project + full-time. It also hosts coding sprints and other events. -.. raw:: html - -
+ .. div:: image-box -
- -.. image:: images/inria-logo.jpg - :width: 100pt - :align: center - :target: https://www.inria.fr - -.. raw:: html - -
-
+ .. image:: images/inria-logo.jpg + :target: https://www.inria.fr ..................... -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -`Paris-Saclay Center for Data Science -`_ -funded one year for a developer to work on the project full-time -(2014-2015), 50% of the time of Guillaume Lemaitre (2016-2017) and 50% of the -time of Joris van den Bossche (2017-2018). + `Paris-Saclay Center for Data Science `_ + funded one year for a developer to work on the project full-time (2014-2015), 50% + of the time of Guillaume Lemaitre (2016-2017) and 50% of the time of Joris van den + Bossche (2017-2018). -.. raw:: html - -
-
+ .. div:: image-box -.. image:: images/cds-logo.png - :width: 100pt - :align: center - :target: http://www.datascience-paris-saclay.fr/ - -.. raw:: html - -
-
+ .. image:: images/cds-logo.png + :target: http://www.datascience-paris-saclay.fr/ .......................... -.. raw:: html - -
-
+.. div:: sk-text-image-grid-small -`NYU Moore-Sloan Data Science Environment `_ -funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan -Data Science Environment also funds several students to work on the project -part-time. + .. div:: text-box -.. raw:: html - -
-
+ `NYU Moore-Sloan Data Science Environment `_ + funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan + Data Science Environment also funds several students to work on the project + part-time. -.. image:: images/nyu_short_color.png - :width: 100pt - :align: center - :target: https://cds.nyu.edu/mooresloan/ - -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/nyu_short_color.png + :target: https://cds.nyu.edu/mooresloan/ ........................ -.. raw:: html - -
-
+.. div:: sk-text-image-grid-small -`Télécom Paristech `_ funded Manoj Kumar -(2014), Tom Dupré la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot -(2016-2017) and Albert Thomas (2017) to work on scikit-learn. + .. div:: text-box -.. raw:: html + `Télécom Paristech `_ funded Manoj Kumar + (2014), Tom Dupré la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot + (2016-2017) and Albert Thomas (2017) to work on scikit-learn. -
-
+ .. div:: image-box -.. image:: images/telecom.png - :width: 50pt - :align: center - :target: https://www.telecom-paristech.fr/ - -.. raw:: html - -
-
+ .. image:: images/telecom.png + :target: https://www.telecom-paristech.fr/ ..................... -.. raw:: html - -
-
- -`The Labex DigiCosme `_ funded Nicolas Goix -(2015-2016), Tom Dupré la Tour (2015-2016 and 2017-2018), Mathurin Massias -(2018-2019) to work part time on scikit-learn during their PhDs. It also -funded a scikit-learn coding sprint in 2015. - -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -.. image:: images/digicosme.png - :width: 100pt - :align: center - :target: https://digicosme.lri.fr + `The Labex DigiCosme `_ funded Nicolas Goix + (2015-2016), Tom Dupré la Tour (2015-2016 and 2017-2018), Mathurin Massias + (2018-2019) to work part time on scikit-learn during their PhDs. It also + funded a scikit-learn coding sprint in 2015. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/digicosme.png + :target: https://digicosme.lri.fr ..................... -.. raw:: html - -
-
- -`The Chan-Zuckerberg Initiative `_ funded Nicolas -Hug to work full-time on scikit-learn in 2020. - -.. raw:: html +.. div:: sk-text-image-grid-small -
-
+ .. div:: text-box -.. image:: images/czi_logo.svg - :width: 100pt - :align: center - :target: https://chanzuckerberg.com + `The Chan-Zuckerberg Initiative `_ funded Nicolas + Hug to work full-time on scikit-learn in 2020. -.. raw:: html + .. div:: image-box -
-
+ .. image:: images/czi.png + :target: https://chanzuckerberg.com ...................... @@ -623,9 +502,9 @@ program. - 2007 - David Cournapeau - 2011 - `Vlad Niculae`_ -- 2012 - `Vlad Niculae`_, Immanuel Bayer. +- 2012 - `Vlad Niculae`_, Immanuel Bayer - 2013 - Kemal Eren, Nicolas Trésegnie -- 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar. +- 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar - 2015 - `Raghav RV `_, Wei Xue - 2016 - `Nelson Liu `_, `YenChen Lin `_ @@ -644,86 +523,110 @@ The following organizations funded the scikit-learn consortium at Inria in the past: .. |msn| image:: images/microsoft.png - :width: 100pt - :target: https://www.microsoft.com/ + :target: https://www.microsoft.com/ .. |bcg| image:: images/bcg.png - :width: 100pt - :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx + :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx .. |fujitsu| image:: images/fujitsu.png - :width: 100pt - :target: https://www.fujitsu.com/global/ + :target: https://www.fujitsu.com/global/ .. |aphp| image:: images/logo_APHP_text.png - :width: 150pt - :target: https://aphp.fr/ + :target: https://aphp.fr/ +.. |hf| image:: images/huggingface_logo-noborder.png + :target: https://huggingface.co -|bcg| |msn| |fujitsu| |aphp| +.. raw:: html + -Sprints -------- +.. grid:: 2 2 4 4 + :class-row: image-subgrid + :gutter: 1 -The International 2019 Paris sprint was kindly hosted by `AXA `_. -Also some participants could attend thanks to the support of the `Alfred P. -Sloan Foundation `_, the `Python Software -Foundation `_ (PSF) and the `DATAIA Institute -`_. + .. grid-item:: + :class: sd-text-center + :child-align: center -..................... + |msn| + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |bcg| + + .. grid-item:: + :class: sd-text-center + :child-align: center -The 2013 International Paris Sprint was made possible thanks to the support of -`Télécom Paristech `_, `tinyclues -`_, the `French Python Association -`_ and the `Fonds de la Recherche Scientifique -`_. + |fujitsu| -.............. + .. grid-item:: + :class: sd-text-center + :child-align: center -The 2011 International Granada sprint was made possible thanks to the support -of the `PSF `_ and `tinyclues -`_. + |aphp| + + .. grid-item:: + :class: sd-text-center + :child-align: center + + |hf| + +Coding Sprints +============== + +The scikit-learn project has a long history of `open source coding sprints +`_ with over 50 sprint +events from 2010 to present day. There are scores of sponsors who contributed +to costs which include venue, food, travel, developer time and more. See +`scikit-learn sprints `_ for a full +list of events. Donating to the project -....................... +======================= If you are interested in donating to the project or to one of our code-sprints, please donate via the `NumFOCUS Donations Page `_. -.. raw :: html - - -
+.. raw:: html -All donations will be handled by `NumFOCUS -`_, a non-profit-organization which is -managed by a board of `Scipy community members -`_. NumFOCUS's mission is to foster -scientific computing software, in particular in Python. As a fiscal home -of scikit-learn, it ensures that money is available when needed to keep -the project funded and available while in compliance with tax regulations. +

+ + Help us, donate! + +

-The received donations for the scikit-learn project mostly will go towards -covering travel-expenses for code sprints, as well as towards the organization -budget of the project [#f1]_. +All donations will be handled by `NumFOCUS `_, a non-profit +organization which is managed by a board of `Scipy community members +`_. NumFOCUS's mission is to foster scientific +computing software, in particular in Python. As a fiscal home of scikit-learn, it +ensures that money is available when needed to keep the project funded and available +while in compliance with tax regulations. +The received donations for the scikit-learn project mostly will go towards covering +travel-expenses for code sprints, as well as towards the organization budget of the +project [#f1]_. .. rubric:: Notes .. [#f1] Regarding the organization budget, in particular, we might use some of - the donated funds to pay for other project expenses such as DNS, - hosting or continuous integration services. + the donated funds to pay for other project expenses such as DNS, + hosting or continuous integration services. + Infrastructure support ----------------------- +====================== -- We would also like to thank `Microsoft Azure - `_, `Cirrus Cl `_, - `CircleCl `_ for free CPU time on their Continuous - Integration servers, and `Anaconda Inc. `_ for the - storage they provide for our staging and nightly builds. +We would also like to thank `Microsoft Azure `_, +`CircleCl `_ for free CPU +time on their Continuous Integration servers, and `Anaconda Inc. `_ +for the storage they provide for our staging and nightly builds. diff --git a/doc/api/deprecated.rst.template b/doc/api/deprecated.rst.template new file mode 100644 index 0000000000000..a48f0180f76ed --- /dev/null +++ b/doc/api/deprecated.rst.template @@ -0,0 +1,24 @@ +:html_theme.sidebar_secondary.remove: + +.. _api_depr_ref: + +Recently Deprecated +=================== + +.. currentmodule:: sklearn + +{% for ver, objs in DEPRECATED_API_REFERENCE %} +.. _api_depr_ref-{{ ver|replace(".", "-") }}: + +.. rubric:: To be removed in {{ ver }} + +.. autosummary:: + :nosignatures: + :toctree: ../modules/generated/ + :template: base.rst + +{% for obj in objs %} + {{ obj }} +{%- endfor %} + +{% endfor %} diff --git a/doc/api/index.rst.template b/doc/api/index.rst.template new file mode 100644 index 0000000000000..b0a3698775a94 --- /dev/null +++ b/doc/api/index.rst.template @@ -0,0 +1,77 @@ +:html_theme.sidebar_secondary.remove: + +.. _api_ref: + +============= +API Reference +============= + +This is the class and function reference of scikit-learn. Please refer to the +:ref:`full user guide ` for further details, as the raw specifications of +classes and functions may not be enough to give full guidelines on their use. For +reference on concepts repeated across the API, see :ref:`glossary`. + +.. toctree:: + :maxdepth: 2 + :hidden: + +{% for module, _ in API_REFERENCE %} + {{ module }} +{%- endfor %} +{%- if DEPRECATED_API_REFERENCE %} + deprecated +{%- endif %} + +.. list-table:: + :header-rows: 1 + :class: apisearch-table + + * - Object + - Description + +{% for module, module_info in API_REFERENCE %} +{% for section in module_info["sections"] %} +{% for obj in section["autosummary"] %} +{% set parts = obj.rsplit(".", 1) %} +{% if parts|length > 1 %} +{% set full_module = module + "." + parts[0] %} +{% else %} +{% set full_module = module %} +{% endif %} + * - :obj:`~{{ module }}.{{ obj }}` + + - .. div:: sk-apisearch-desc + + .. currentmodule:: {{ full_module }} + + .. autoshortsummary:: {{ module }}.{{ obj }} + + .. div:: caption + + :mod:`{{ full_module }}` +{% endfor %} +{% endfor %} +{% endfor %} + +{% for ver, objs in DEPRECATED_API_REFERENCE %} +{% for obj in objs %} +{% set parts = obj.rsplit(".", 1) %} +{% if parts|length > 1 %} +{% set full_module = "sklearn." + parts[0] %} +{% else %} +{% set full_module = "sklearn" %} +{% endif %} + * - :obj:`~sklearn.{{ obj }}` + + - .. div:: sk-apisearch-desc + + .. currentmodule:: {{ full_module }} + + .. autoshortsummary:: sklearn.{{ obj }} + + .. div:: caption + + :mod:`{{ full_module }}` + :bdg-ref-danger-line:`Deprecated in version {{ ver }} ` +{% endfor %} +{% endfor %} diff --git a/doc/api/module.rst.template b/doc/api/module.rst.template new file mode 100644 index 0000000000000..1980f27aad158 --- /dev/null +++ b/doc/api/module.rst.template @@ -0,0 +1,46 @@ +:html_theme.sidebar_secondary.remove: + +{% if module == "sklearn" -%} +{%- set module_hook = "sklearn" -%} +{%- elif module.startswith("sklearn.") -%} +{%- set module_hook = module[8:] -%} +{%- else -%} +{%- set module_hook = None -%} +{%- endif -%} + +{% if module_hook %} +.. _{{ module_hook }}_ref: +{% endif %} + +{{ module }} +{{ "=" * module|length }} + +.. automodule:: {{ module }} + +{% if module_info["description"] %} +{{ module_info["description"] }} +{% endif %} + +{% for section in module_info["sections"] %} +{% if section["title"] and module_hook %} +.. _{{ module_hook }}_ref-{{ section["title"]|lower|replace(" ", "-") }}: +{% endif %} + +{% if section["title"] %} +{{ section["title"] }} +{{ "-" * section["title"]|length }} +{% endif %} + +{% if section["description"] %} +{{ section["description"] }} +{% endif %} + +.. autosummary:: + :nosignatures: + :toctree: ../modules/generated/ + :template: base.rst + +{% for obj in section["autosummary"] %} + {{ obj }} +{%- endfor %} +{% endfor %} diff --git a/doc/api_reference.py b/doc/api_reference.py new file mode 100644 index 0000000000000..c90b115746415 --- /dev/null +++ b/doc/api_reference.py @@ -0,0 +1,1352 @@ +"""Configuration for the API reference documentation.""" + + +def _get_guide(*refs, is_developer=False): + """Get the rst to refer to user/developer guide. + + `refs` is several references that can be used in the :ref:`...` directive. + """ + if len(refs) == 1: + ref_desc = f":ref:`{refs[0]}` section" + elif len(refs) == 2: + ref_desc = f":ref:`{refs[0]}` and :ref:`{refs[1]}` sections" + else: + ref_desc = ", ".join(f":ref:`{ref}`" for ref in refs[:-1]) + ref_desc += f", and :ref:`{refs[-1]}` sections" + + guide_name = "Developer" if is_developer else "User" + return f"**{guide_name} guide.** See the {ref_desc} for further details." + + +def _get_submodule(module_name, submodule_name): + """Get the submodule docstring and automatically add the hook. + + `module_name` is e.g. `sklearn.feature_extraction`, and `submodule_name` is e.g. + `image`, so we get the docstring and hook for `sklearn.feature_extraction.image` + submodule. `module_name` is used to reset the current module because autosummary + automatically changes the current module. + """ + lines = [ + f".. automodule:: {module_name}.{submodule_name}", + f".. currentmodule:: {module_name}", + ] + return "\n\n".join(lines) + + +""" +CONFIGURING API_REFERENCE +========================= + +API_REFERENCE maps each module name to a dictionary that consists of the following +components: + +short_summary (required) + The text to be printed on the index page; it has nothing to do the API reference + page of each module. +description (required, `None` if not needed) + The additional description for the module to be placed under the module + docstring, before the sections start. +sections (required) + A list of sections, each of which consists of: + - title (required, `None` if not needed): the section title, commonly it should + not be `None` except for the first section of a module, + - description (optional): the optional additional description for the section, + - autosummary (required): an autosummary block, assuming current module is the + current module name. + +Essentially, the rendered page would look like the following: + +|---------------------------------------------------------------------------------| +| {{ module_name }} | +| ================= | +| {{ module_docstring }} | +| {{ description }} | +| | +| {{ section_title_1 }} <-------------- Optional if one wants the first | +| --------------------- section to directly follow | +| {{ section_description_1 }} without a second-level heading. | +| {{ section_autosummary_1 }} | +| | +| {{ section_title_2 }} | +| --------------------- | +| {{ section_description_2 }} | +| {{ section_autosummary_2 }} | +| | +| More sections... | +|---------------------------------------------------------------------------------| + +Hooks will be automatically generated for each module and each section. For a module, +e.g., `sklearn.feature_extraction`, the hook would be `feature_extraction_ref`; for a +section, e.g., "From text" under `sklearn.feature_extraction`, the hook would be +`feature_extraction_ref-from-text`. However, note that a better way is to refer using +the :mod: directive, e.g., :mod:`sklearn.feature_extraction` for the module and +:mod:`sklearn.feature_extraction.text` for the section. Only in case that a section +is not a particular submodule does the hook become useful, e.g., the "Loaders" section +under `sklearn.datasets`. +""" + +API_REFERENCE = { + "sklearn": { + "short_summary": "Settings and information tools.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "config_context", + "get_config", + "set_config", + "show_versions", + ], + }, + ], + }, + "sklearn.base": { + "short_summary": "Base classes and utility functions.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "BaseEstimator", + "BiclusterMixin", + "ClassNamePrefixFeaturesOutMixin", + "ClassifierMixin", + "ClusterMixin", + "DensityMixin", + "MetaEstimatorMixin", + "OneToOneFeatureMixin", + "OutlierMixin", + "RegressorMixin", + "TransformerMixin", + "clone", + "is_classifier", + "is_clusterer", + "is_regressor", + "is_outlier_detector", + ], + } + ], + }, + "sklearn.calibration": { + "short_summary": "Probability calibration.", + "description": _get_guide("calibration"), + "sections": [ + { + "title": None, + "autosummary": ["CalibratedClassifierCV", "calibration_curve"], + }, + { + "title": "Visualization", + "autosummary": ["CalibrationDisplay"], + }, + ], + }, + "sklearn.cluster": { + "short_summary": "Clustering.", + "description": _get_guide("clustering", "biclustering"), + "sections": [ + { + "title": None, + "autosummary": [ + "AffinityPropagation", + "AgglomerativeClustering", + "Birch", + "BisectingKMeans", + "DBSCAN", + "FeatureAgglomeration", + "HDBSCAN", + "KMeans", + "MeanShift", + "MiniBatchKMeans", + "OPTICS", + "SpectralBiclustering", + "SpectralClustering", + "SpectralCoclustering", + "affinity_propagation", + "cluster_optics_dbscan", + "cluster_optics_xi", + "compute_optics_graph", + "dbscan", + "estimate_bandwidth", + "k_means", + "kmeans_plusplus", + "mean_shift", + "spectral_clustering", + "ward_tree", + ], + }, + ], + }, + "sklearn.compose": { + "short_summary": "Composite estimators.", + "description": _get_guide("combining_estimators"), + "sections": [ + { + "title": None, + "autosummary": [ + "ColumnTransformer", + "TransformedTargetRegressor", + "make_column_selector", + "make_column_transformer", + ], + }, + ], + }, + "sklearn.covariance": { + "short_summary": "Covariance estimation.", + "description": _get_guide("covariance"), + "sections": [ + { + "title": None, + "autosummary": [ + "EllipticEnvelope", + "EmpiricalCovariance", + "GraphicalLasso", + "GraphicalLassoCV", + "LedoitWolf", + "MinCovDet", + "OAS", + "ShrunkCovariance", + "empirical_covariance", + "graphical_lasso", + "ledoit_wolf", + "ledoit_wolf_shrinkage", + "oas", + "shrunk_covariance", + ], + }, + ], + }, + "sklearn.cross_decomposition": { + "short_summary": "Cross decomposition.", + "description": _get_guide("cross_decomposition"), + "sections": [ + { + "title": None, + "autosummary": ["CCA", "PLSCanonical", "PLSRegression", "PLSSVD"], + }, + ], + }, + "sklearn.datasets": { + "short_summary": "Datasets.", + "description": _get_guide("datasets"), + "sections": [ + { + "title": "Loaders", + "autosummary": [ + "clear_data_home", + "dump_svmlight_file", + "fetch_20newsgroups", + "fetch_20newsgroups_vectorized", + "fetch_california_housing", + "fetch_covtype", + "fetch_file", + "fetch_kddcup99", + "fetch_lfw_pairs", + "fetch_lfw_people", + "fetch_olivetti_faces", + "fetch_openml", + "fetch_rcv1", + "fetch_species_distributions", + "get_data_home", + "load_breast_cancer", + "load_diabetes", + "load_digits", + "load_files", + "load_iris", + "load_linnerud", + "load_sample_image", + "load_sample_images", + "load_svmlight_file", + "load_svmlight_files", + "load_wine", + ], + }, + { + "title": "Sample generators", + "autosummary": [ + "make_biclusters", + "make_blobs", + "make_checkerboard", + "make_circles", + "make_classification", + "make_friedman1", + "make_friedman2", + "make_friedman3", + "make_gaussian_quantiles", + "make_hastie_10_2", + "make_low_rank_matrix", + "make_moons", + "make_multilabel_classification", + "make_regression", + "make_s_curve", + "make_sparse_coded_signal", + "make_sparse_spd_matrix", + "make_sparse_uncorrelated", + "make_spd_matrix", + "make_swiss_roll", + ], + }, + ], + }, + "sklearn.decomposition": { + "short_summary": "Matrix decomposition.", + "description": _get_guide("decompositions"), + "sections": [ + { + "title": None, + "autosummary": [ + "DictionaryLearning", + "FactorAnalysis", + "FastICA", + "IncrementalPCA", + "KernelPCA", + "LatentDirichletAllocation", + "MiniBatchDictionaryLearning", + "MiniBatchNMF", + "MiniBatchSparsePCA", + "NMF", + "PCA", + "SparseCoder", + "SparsePCA", + "TruncatedSVD", + "dict_learning", + "dict_learning_online", + "fastica", + "non_negative_factorization", + "sparse_encode", + ], + }, + ], + }, + "sklearn.discriminant_analysis": { + "short_summary": "Discriminant analysis.", + "description": _get_guide("lda_qda"), + "sections": [ + { + "title": None, + "autosummary": [ + "LinearDiscriminantAnalysis", + "QuadraticDiscriminantAnalysis", + ], + }, + ], + }, + "sklearn.dummy": { + "short_summary": "Dummy estimators.", + "description": _get_guide("model_evaluation"), + "sections": [ + { + "title": None, + "autosummary": ["DummyClassifier", "DummyRegressor"], + }, + ], + }, + "sklearn.ensemble": { + "short_summary": "Ensemble methods.", + "description": _get_guide("ensemble"), + "sections": [ + { + "title": None, + "autosummary": [ + "AdaBoostClassifier", + "AdaBoostRegressor", + "BaggingClassifier", + "BaggingRegressor", + "ExtraTreesClassifier", + "ExtraTreesRegressor", + "GradientBoostingClassifier", + "GradientBoostingRegressor", + "HistGradientBoostingClassifier", + "HistGradientBoostingRegressor", + "IsolationForest", + "RandomForestClassifier", + "RandomForestRegressor", + "RandomTreesEmbedding", + "StackingClassifier", + "StackingRegressor", + "VotingClassifier", + "VotingRegressor", + ], + }, + ], + }, + "sklearn.exceptions": { + "short_summary": "Exceptions and warnings.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": [ + "ConvergenceWarning", + "DataConversionWarning", + "DataDimensionalityWarning", + "EfficiencyWarning", + "FitFailedWarning", + "InconsistentVersionWarning", + "NotFittedError", + "UndefinedMetricWarning", + "EstimatorCheckFailedWarning", + ], + }, + ], + }, + "sklearn.experimental": { + "short_summary": "Experimental tools.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": ["enable_halving_search_cv", "enable_iterative_imputer"], + }, + ], + }, + "sklearn.feature_extraction": { + "short_summary": "Feature extraction.", + "description": _get_guide("feature_extraction"), + "sections": [ + { + "title": None, + "autosummary": ["DictVectorizer", "FeatureHasher"], + }, + { + "title": "From images", + "description": _get_submodule("sklearn.feature_extraction", "image"), + "autosummary": [ + "image.PatchExtractor", + "image.extract_patches_2d", + "image.grid_to_graph", + "image.img_to_graph", + "image.reconstruct_from_patches_2d", + ], + }, + { + "title": "From text", + "description": _get_submodule("sklearn.feature_extraction", "text"), + "autosummary": [ + "text.CountVectorizer", + "text.HashingVectorizer", + "text.TfidfTransformer", + "text.TfidfVectorizer", + ], + }, + ], + }, + "sklearn.feature_selection": { + "short_summary": "Feature selection.", + "description": _get_guide("feature_selection"), + "sections": [ + { + "title": None, + "autosummary": [ + "GenericUnivariateSelect", + "RFE", + "RFECV", + "SelectFdr", + "SelectFpr", + "SelectFromModel", + "SelectFwe", + "SelectKBest", + "SelectPercentile", + "SelectorMixin", + "SequentialFeatureSelector", + "VarianceThreshold", + "chi2", + "f_classif", + "f_regression", + "mutual_info_classif", + "mutual_info_regression", + "r_regression", + ], + }, + ], + }, + "sklearn.frozen": { + "short_summary": "Frozen estimators.", + "description": None, + "sections": [ + { + "title": None, + "autosummary": ["FrozenEstimator"], + }, + ], + }, + "sklearn.gaussian_process": { + "short_summary": "Gaussian processes.", + "description": _get_guide("gaussian_process"), + "sections": [ + { + "title": None, + "autosummary": [ + "GaussianProcessClassifier", + "GaussianProcessRegressor", + ], + }, + { + "title": "Kernels", + "description": _get_submodule("sklearn.gaussian_process", "kernels"), + "autosummary": [ + "kernels.CompoundKernel", + "kernels.ConstantKernel", + "kernels.DotProduct", + "kernels.ExpSineSquared", + "kernels.Exponentiation", + "kernels.Hyperparameter", + "kernels.Kernel", + "kernels.Matern", + "kernels.PairwiseKernel", + "kernels.Product", + "kernels.RBF", + "kernels.RationalQuadratic", + "kernels.Sum", + "kernels.WhiteKernel", + ], + }, + ], + }, + "sklearn.impute": { + "short_summary": "Imputation.", + "description": _get_guide("impute"), + "sections": [ + { + "title": None, + "autosummary": [ + "IterativeImputer", + "KNNImputer", + "MissingIndicator", + "SimpleImputer", + ], + }, + ], + }, + "sklearn.inspection": { + "short_summary": "Inspection.", + "description": _get_guide("inspection"), + "sections": [ + { + "title": None, + "autosummary": ["partial_dependence", "permutation_importance"], + }, + { + "title": "Plotting", + "autosummary": ["DecisionBoundaryDisplay", "PartialDependenceDisplay"], + }, + ], + }, + "sklearn.isotonic": { + "short_summary": "Isotonic regression.", + "description": _get_guide("isotonic"), + "sections": [ + { + "title": None, + "autosummary": [ + "IsotonicRegression", + "check_increasing", + "isotonic_regression", + ], + }, + ], + }, + "sklearn.kernel_approximation": { + "short_summary": "Kernel approximation.", + "description": _get_guide("kernel_approximation"), + "sections": [ + { + "title": None, + "autosummary": [ + "AdditiveChi2Sampler", + "Nystroem", + "PolynomialCountSketch", + "RBFSampler", + "SkewedChi2Sampler", + ], + }, + ], + }, + "sklearn.kernel_ridge": { + "short_summary": "Kernel ridge regression.", + "description": _get_guide("kernel_ridge"), + "sections": [ + { + "title": None, + "autosummary": ["KernelRidge"], + }, + ], + }, + "sklearn.linear_model": { + "short_summary": "Generalized linear models.", + "description": ( + _get_guide("linear_model") + + "\n\nThe following subsections are only rough guidelines: the same " + "estimator can fall into multiple categories, depending on its parameters." + ), + "sections": [ + { + "title": "Linear classifiers", + "autosummary": [ + "LogisticRegression", + "LogisticRegressionCV", + "PassiveAggressiveClassifier", + "Perceptron", + "RidgeClassifier", + "RidgeClassifierCV", + "SGDClassifier", + "SGDOneClassSVM", + ], + }, + { + "title": "Classical linear regressors", + "autosummary": ["LinearRegression", "Ridge", "RidgeCV", "SGDRegressor"], + }, + { + "title": "Regressors with variable selection", + "description": ( + "The following estimators have built-in variable selection fitting " + "procedures, but any estimator using a L1 or elastic-net penalty " + "also performs variable selection: typically " + ":class:`~linear_model.SGDRegressor` or " + ":class:`~sklearn.linear_model.SGDClassifier` with an appropriate " + "penalty." + ), + "autosummary": [ + "ElasticNet", + "ElasticNetCV", + "Lars", + "LarsCV", + "Lasso", + "LassoCV", + "LassoLars", + "LassoLarsCV", + "LassoLarsIC", + "OrthogonalMatchingPursuit", + "OrthogonalMatchingPursuitCV", + ], + }, + { + "title": "Bayesian regressors", + "autosummary": ["ARDRegression", "BayesianRidge"], + }, + { + "title": "Multi-task linear regressors with variable selection", + "description": ( + "These estimators fit multiple regression problems (or tasks)" + " jointly, while inducing sparse coefficients. While the inferred" + " coefficients may differ between the tasks, they are constrained" + " to agree on the features that are selected (non-zero" + " coefficients)." + ), + "autosummary": [ + "MultiTaskElasticNet", + "MultiTaskElasticNetCV", + "MultiTaskLasso", + "MultiTaskLassoCV", + ], + }, + { + "title": "Outlier-robust regressors", + "description": ( + "Any estimator using the Huber loss would also be robust to " + "outliers, e.g., :class:`~linear_model.SGDRegressor` with " + "``loss='huber'``." + ), + "autosummary": [ + "HuberRegressor", + "QuantileRegressor", + "RANSACRegressor", + "TheilSenRegressor", + ], + }, + { + "title": "Generalized linear models (GLM) for regression", + "description": ( + "These models allow for response variables to have error " + "distributions other than a normal distribution." + ), + "autosummary": [ + "GammaRegressor", + "PoissonRegressor", + "TweedieRegressor", + ], + }, + { + "title": "Miscellaneous", + "autosummary": [ + "PassiveAggressiveRegressor", + "enet_path", + "lars_path", + "lars_path_gram", + "lasso_path", + "orthogonal_mp", + "orthogonal_mp_gram", + "ridge_regression", + ], + }, + ], + }, + "sklearn.manifold": { + "short_summary": "Manifold learning.", + "description": _get_guide("manifold"), + "sections": [ + { + "title": None, + "autosummary": [ + "Isomap", + "LocallyLinearEmbedding", + "MDS", + "SpectralEmbedding", + "TSNE", + "locally_linear_embedding", + "smacof", + "spectral_embedding", + "trustworthiness", + ], + }, + ], + }, + "sklearn.metrics": { + "short_summary": "Metrics.", + "description": _get_guide("model_evaluation", "metrics"), + "sections": [ + { + "title": "Model selection interface", + "description": _get_guide("scoring_parameter"), + "autosummary": [ + "check_scoring", + "get_scorer", + "get_scorer_names", + "make_scorer", + ], + }, + { + "title": "Classification metrics", + "description": _get_guide("classification_metrics"), + "autosummary": [ + "accuracy_score", + "auc", + "average_precision_score", + "balanced_accuracy_score", + "brier_score_loss", + "class_likelihood_ratios", + "classification_report", + "cohen_kappa_score", + "confusion_matrix", + "d2_log_loss_score", + "dcg_score", + "det_curve", + "f1_score", + "fbeta_score", + "hamming_loss", + "hinge_loss", + "jaccard_score", + "log_loss", + "matthews_corrcoef", + "multilabel_confusion_matrix", + "ndcg_score", + "precision_recall_curve", + "precision_recall_fscore_support", + "precision_score", + "recall_score", + "roc_auc_score", + "roc_curve", + "top_k_accuracy_score", + "zero_one_loss", + ], + }, + { + "title": "Regression metrics", + "description": _get_guide("regression_metrics"), + "autosummary": [ + "d2_absolute_error_score", + "d2_pinball_score", + "d2_tweedie_score", + "explained_variance_score", + "max_error", + "mean_absolute_error", + "mean_absolute_percentage_error", + "mean_gamma_deviance", + "mean_pinball_loss", + "mean_poisson_deviance", + "mean_squared_error", + "mean_squared_log_error", + "mean_tweedie_deviance", + "median_absolute_error", + "r2_score", + "root_mean_squared_error", + "root_mean_squared_log_error", + ], + }, + { + "title": "Multilabel ranking metrics", + "description": _get_guide("multilabel_ranking_metrics"), + "autosummary": [ + "coverage_error", + "label_ranking_average_precision_score", + "label_ranking_loss", + ], + }, + { + "title": "Clustering metrics", + "description": ( + _get_submodule("sklearn.metrics", "cluster") + + "\n\n" + + _get_guide("clustering_evaluation") + ), + "autosummary": [ + "adjusted_mutual_info_score", + "adjusted_rand_score", + "calinski_harabasz_score", + "cluster.contingency_matrix", + "cluster.pair_confusion_matrix", + "completeness_score", + "davies_bouldin_score", + "fowlkes_mallows_score", + "homogeneity_completeness_v_measure", + "homogeneity_score", + "mutual_info_score", + "normalized_mutual_info_score", + "rand_score", + "silhouette_samples", + "silhouette_score", + "v_measure_score", + ], + }, + { + "title": "Biclustering metrics", + "description": _get_guide("biclustering_evaluation"), + "autosummary": ["consensus_score"], + }, + { + "title": "Distance metrics", + "autosummary": ["DistanceMetric"], + }, + { + "title": "Pairwise metrics", + "description": ( + _get_submodule("sklearn.metrics", "pairwise") + + "\n\n" + + _get_guide("metrics") + ), + "autosummary": [ + "pairwise.additive_chi2_kernel", + "pairwise.chi2_kernel", + "pairwise.cosine_distances", + "pairwise.cosine_similarity", + "pairwise.distance_metrics", + "pairwise.euclidean_distances", + "pairwise.haversine_distances", + "pairwise.kernel_metrics", + "pairwise.laplacian_kernel", + "pairwise.linear_kernel", + "pairwise.manhattan_distances", + "pairwise.nan_euclidean_distances", + "pairwise.paired_cosine_distances", + "pairwise.paired_distances", + "pairwise.paired_euclidean_distances", + "pairwise.paired_manhattan_distances", + "pairwise.pairwise_kernels", + "pairwise.polynomial_kernel", + "pairwise.rbf_kernel", + "pairwise.sigmoid_kernel", + "pairwise_distances", + "pairwise_distances_argmin", + "pairwise_distances_argmin_min", + "pairwise_distances_chunked", + ], + }, + { + "title": "Plotting", + "description": _get_guide("visualizations"), + "autosummary": [ + "ConfusionMatrixDisplay", + "DetCurveDisplay", + "PrecisionRecallDisplay", + "PredictionErrorDisplay", + "RocCurveDisplay", + ], + }, + ], + }, + "sklearn.mixture": { + "short_summary": "Gaussian mixture models.", + "description": _get_guide("mixture"), + "sections": [ + { + "title": None, + "autosummary": ["BayesianGaussianMixture", "GaussianMixture"], + }, + ], + }, + "sklearn.model_selection": { + "short_summary": "Model selection.", + "description": _get_guide("cross_validation", "grid_search", "learning_curve"), + "sections": [ + { + "title": "Splitters", + "autosummary": [ + "GroupKFold", + "GroupShuffleSplit", + "KFold", + "LeaveOneGroupOut", + "LeaveOneOut", + "LeavePGroupsOut", + "LeavePOut", + "PredefinedSplit", + "RepeatedKFold", + "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedGroupKFold", + "StratifiedKFold", + "StratifiedShuffleSplit", + "TimeSeriesSplit", + "check_cv", + "train_test_split", + ], + }, + { + "title": "Hyper-parameter optimizers", + "autosummary": [ + "GridSearchCV", + "HalvingGridSearchCV", + "HalvingRandomSearchCV", + "ParameterGrid", + "ParameterSampler", + "RandomizedSearchCV", + ], + }, + { + "title": "Post-fit model tuning", + "autosummary": [ + "FixedThresholdClassifier", + "TunedThresholdClassifierCV", + ], + }, + { + "title": "Model validation", + "autosummary": [ + "cross_val_predict", + "cross_val_score", + "cross_validate", + "learning_curve", + "permutation_test_score", + "validation_curve", + ], + }, + { + "title": "Visualization", + "autosummary": ["LearningCurveDisplay", "ValidationCurveDisplay"], + }, + ], + }, + "sklearn.multiclass": { + "short_summary": "Multiclass classification.", + "description": _get_guide("multiclass_classification"), + "sections": [ + { + "title": None, + "autosummary": [ + "OneVsOneClassifier", + "OneVsRestClassifier", + "OutputCodeClassifier", + ], + }, + ], + }, + "sklearn.multioutput": { + "short_summary": "Multioutput regression and classification.", + "description": _get_guide( + "multilabel_classification", + "multiclass_multioutput_classification", + "multioutput_regression", + ), + "sections": [ + { + "title": None, + "autosummary": [ + "ClassifierChain", + "MultiOutputClassifier", + "MultiOutputRegressor", + "RegressorChain", + ], + }, + ], + }, + "sklearn.naive_bayes": { + "short_summary": "Naive Bayes.", + "description": _get_guide("naive_bayes"), + "sections": [ + { + "title": None, + "autosummary": [ + "BernoulliNB", + "CategoricalNB", + "ComplementNB", + "GaussianNB", + "MultinomialNB", + ], + }, + ], + }, + "sklearn.neighbors": { + "short_summary": "Nearest neighbors.", + "description": _get_guide("neighbors"), + "sections": [ + { + "title": None, + "autosummary": [ + "BallTree", + "KDTree", + "KNeighborsClassifier", + "KNeighborsRegressor", + "KNeighborsTransformer", + "KernelDensity", + "LocalOutlierFactor", + "NearestCentroid", + "NearestNeighbors", + "NeighborhoodComponentsAnalysis", + "RadiusNeighborsClassifier", + "RadiusNeighborsRegressor", + "RadiusNeighborsTransformer", + "kneighbors_graph", + "radius_neighbors_graph", + "sort_graph_by_row_values", + ], + }, + ], + }, + "sklearn.neural_network": { + "short_summary": "Neural network models.", + "description": _get_guide( + "neural_networks_supervised", "neural_networks_unsupervised" + ), + "sections": [ + { + "title": None, + "autosummary": ["BernoulliRBM", "MLPClassifier", "MLPRegressor"], + }, + ], + }, + "sklearn.pipeline": { + "short_summary": "Pipeline.", + "description": _get_guide("combining_estimators"), + "sections": [ + { + "title": None, + "autosummary": [ + "FeatureUnion", + "Pipeline", + "make_pipeline", + "make_union", + ], + }, + ], + }, + "sklearn.preprocessing": { + "short_summary": "Preprocessing and normalization.", + "description": _get_guide("preprocessing"), + "sections": [ + { + "title": None, + "autosummary": [ + "Binarizer", + "FunctionTransformer", + "KBinsDiscretizer", + "KernelCenterer", + "LabelBinarizer", + "LabelEncoder", + "MaxAbsScaler", + "MinMaxScaler", + "MultiLabelBinarizer", + "Normalizer", + "OneHotEncoder", + "OrdinalEncoder", + "PolynomialFeatures", + "PowerTransformer", + "QuantileTransformer", + "RobustScaler", + "SplineTransformer", + "StandardScaler", + "TargetEncoder", + "add_dummy_feature", + "binarize", + "label_binarize", + "maxabs_scale", + "minmax_scale", + "normalize", + "power_transform", + "quantile_transform", + "robust_scale", + "scale", + ], + }, + ], + }, + "sklearn.random_projection": { + "short_summary": "Random projection.", + "description": _get_guide("random_projection"), + "sections": [ + { + "title": None, + "autosummary": [ + "GaussianRandomProjection", + "SparseRandomProjection", + "johnson_lindenstrauss_min_dim", + ], + }, + ], + }, + "sklearn.semi_supervised": { + "short_summary": "Semi-supervised learning.", + "description": _get_guide("semi_supervised"), + "sections": [ + { + "title": None, + "autosummary": [ + "LabelPropagation", + "LabelSpreading", + "SelfTrainingClassifier", + ], + }, + ], + }, + "sklearn.svm": { + "short_summary": "Support vector machines.", + "description": _get_guide("svm"), + "sections": [ + { + "title": None, + "autosummary": [ + "LinearSVC", + "LinearSVR", + "NuSVC", + "NuSVR", + "OneClassSVM", + "SVC", + "SVR", + "l1_min_c", + ], + }, + ], + }, + "sklearn.tree": { + "short_summary": "Decision trees.", + "description": _get_guide("tree"), + "sections": [ + { + "title": None, + "autosummary": [ + "DecisionTreeClassifier", + "DecisionTreeRegressor", + "ExtraTreeClassifier", + "ExtraTreeRegressor", + ], + }, + { + "title": "Exporting", + "autosummary": ["export_graphviz", "export_text"], + }, + { + "title": "Plotting", + "autosummary": ["plot_tree"], + }, + ], + }, + "sklearn.utils": { + "short_summary": "Utilities.", + "description": _get_guide("developers-utils", is_developer=True), + "sections": [ + { + "title": None, + "autosummary": [ + "Bunch", + "_safe_indexing", + "as_float_array", + "assert_all_finite", + "deprecated", + "estimator_html_repr", + "gen_batches", + "gen_even_slices", + "indexable", + "murmurhash3_32", + "resample", + "safe_mask", + "safe_sqr", + "shuffle", + "Tags", + "InputTags", + "TargetTags", + "ClassifierTags", + "RegressorTags", + "TransformerTags", + "get_tags", + ], + }, + { + "title": "Input and parameter validation", + "description": _get_submodule("sklearn.utils", "validation"), + "autosummary": [ + "check_X_y", + "check_array", + "check_consistent_length", + "check_random_state", + "check_scalar", + "validation.check_is_fitted", + "validation.check_memory", + "validation.check_symmetric", + "validation.column_or_1d", + "validation.has_fit_parameter", + "validation.validate_data", + ], + }, + { + "title": "Meta-estimators", + "description": _get_submodule("sklearn.utils", "metaestimators"), + "autosummary": ["metaestimators.available_if"], + }, + { + "title": "Weight handling based on class labels", + "description": _get_submodule("sklearn.utils", "class_weight"), + "autosummary": [ + "class_weight.compute_class_weight", + "class_weight.compute_sample_weight", + ], + }, + { + "title": "Dealing with multiclass target in classifiers", + "description": _get_submodule("sklearn.utils", "multiclass"), + "autosummary": [ + "multiclass.is_multilabel", + "multiclass.type_of_target", + "multiclass.unique_labels", + ], + }, + { + "title": "Optimal mathematical operations", + "description": _get_submodule("sklearn.utils", "extmath"), + "autosummary": [ + "extmath.density", + "extmath.fast_logdet", + "extmath.randomized_range_finder", + "extmath.randomized_svd", + "extmath.safe_sparse_dot", + "extmath.weighted_mode", + ], + }, + { + "title": "Working with sparse matrices and arrays", + "description": _get_submodule("sklearn.utils", "sparsefuncs"), + "autosummary": [ + "sparsefuncs.incr_mean_variance_axis", + "sparsefuncs.inplace_column_scale", + "sparsefuncs.inplace_csr_column_scale", + "sparsefuncs.inplace_row_scale", + "sparsefuncs.inplace_swap_column", + "sparsefuncs.inplace_swap_row", + "sparsefuncs.mean_variance_axis", + ], + }, + { + "title": None, + "description": _get_submodule("sklearn.utils", "sparsefuncs_fast"), + "autosummary": [ + "sparsefuncs_fast.inplace_csr_row_normalize_l1", + "sparsefuncs_fast.inplace_csr_row_normalize_l2", + ], + }, + { + "title": "Working with graphs", + "description": _get_submodule("sklearn.utils", "graph"), + "autosummary": ["graph.single_source_shortest_path_length"], + }, + { + "title": "Random sampling", + "description": _get_submodule("sklearn.utils", "random"), + "autosummary": ["random.sample_without_replacement"], + }, + { + "title": "Auxiliary functions that operate on arrays", + "description": _get_submodule("sklearn.utils", "arrayfuncs"), + "autosummary": ["arrayfuncs.min_pos"], + }, + { + "title": "Metadata routing", + "description": ( + _get_submodule("sklearn.utils", "metadata_routing") + + "\n\n" + + _get_guide("metadata_routing") + ), + "autosummary": [ + "metadata_routing.MetadataRequest", + "metadata_routing.MetadataRouter", + "metadata_routing.MethodMapping", + "metadata_routing.get_routing_for_object", + "metadata_routing.process_routing", + ], + }, + { + "title": "Discovering scikit-learn objects", + "description": _get_submodule("sklearn.utils", "discovery"), + "autosummary": [ + "discovery.all_displays", + "discovery.all_estimators", + "discovery.all_functions", + ], + }, + { + "title": "API compatibility checkers", + "description": _get_submodule("sklearn.utils", "estimator_checks"), + "autosummary": [ + "estimator_checks.check_estimator", + "estimator_checks.parametrize_with_checks", + "estimator_checks.estimator_checks_generator", + ], + }, + { + "title": "Parallel computing", + "description": _get_submodule("sklearn.utils", "parallel"), + "autosummary": [ + "parallel.Parallel", + "parallel.delayed", + ], + }, + ], + }, +} + + +""" +CONFIGURING DEPRECATED_API_REFERENCE +==================================== + +DEPRECATED_API_REFERENCE maps each deprecation target version to a corresponding +autosummary block. It will be placed at the bottom of the API index page under the +"Recently deprecated" section. Essentially, the rendered section would look like the +following: + +|------------------------------------------| +| To be removed in {{ version_1 }} | +| -------------------------------- | +| {{ autosummary_1 }} | +| | +| To be removed in {{ version_2 }} | +| -------------------------------- | +| {{ autosummary_2 }} | +| | +| More versions... | +|------------------------------------------| + +Note that the autosummary here assumes that the current module is `sklearn`, i.e., if +`sklearn.utils.Memory` is deprecated, one should put `utils.Memory` in the "entries" +slot of the autosummary block. + +Example: + +DEPRECATED_API_REFERENCE = { + "0.24": [ + "model_selection.fit_grid_point", + "utils.safe_indexing", + ], +} +""" + +DEPRECATED_API_REFERENCE = {} # type: ignore[var-annotated] diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst index 41eb16665a612..129f9b3990fd5 100644 --- a/doc/common_pitfalls.rst +++ b/doc/common_pitfalls.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _common_pitfalls: ========================================= @@ -166,7 +160,7 @@ much higher than expected accuracy score:: >>> from sklearn.model_selection import train_test_split >>> from sklearn.feature_selection import SelectKBest - >>> from sklearn.ensemble import GradientBoostingClassifier + >>> from sklearn.ensemble import HistGradientBoostingClassifier >>> from sklearn.metrics import accuracy_score >>> # Incorrect preprocessing: the entire data is transformed @@ -174,9 +168,9 @@ much higher than expected accuracy score:: >>> X_train, X_test, y_train, y_test = train_test_split( ... X_selected, y, random_state=42) - >>> gbc = GradientBoostingClassifier(random_state=1) + >>> gbc = HistGradientBoostingClassifier(random_state=1) >>> gbc.fit(X_train, y_train) - GradientBoostingClassifier(random_state=1) + HistGradientBoostingClassifier(random_state=1) >>> y_pred = gbc.predict(X_test) >>> accuracy_score(y_test, y_pred) @@ -195,14 +189,14 @@ data, close to chance:: >>> select = SelectKBest(k=25) >>> X_train_selected = select.fit_transform(X_train, y_train) - >>> gbc = GradientBoostingClassifier(random_state=1) + >>> gbc = HistGradientBoostingClassifier(random_state=1) >>> gbc.fit(X_train_selected, y_train) - GradientBoostingClassifier(random_state=1) + HistGradientBoostingClassifier(random_state=1) >>> X_test_selected = select.transform(X_test) >>> y_pred = gbc.predict(X_test_selected) >>> accuracy_score(y_test, y_pred) - 0.46 + 0.5 Here again, we recommend using a :class:`~sklearn.pipeline.Pipeline` to chain together the feature selection and model estimators. The pipeline ensures @@ -213,15 +207,15 @@ is used only for calculating the accuracy score:: >>> X_train, X_test, y_train, y_test = train_test_split( ... X, y, random_state=42) >>> pipeline = make_pipeline(SelectKBest(k=25), - ... GradientBoostingClassifier(random_state=1)) + ... HistGradientBoostingClassifier(random_state=1)) >>> pipeline.fit(X_train, y_train) Pipeline(steps=[('selectkbest', SelectKBest(k=25)), - ('gradientboostingclassifier', - GradientBoostingClassifier(random_state=1))]) + ('histgradientboostingclassifier', + HistGradientBoostingClassifier(random_state=1))]) >>> y_pred = pipeline.predict(X_test) >>> accuracy_score(y_test, y_pred) - 0.46 + 0.5 The pipeline can also be fed into a cross-validation function such as :func:`~sklearn.model_selection.cross_val_score`. @@ -231,7 +225,7 @@ method is used during fitting and predicting:: >>> from sklearn.model_selection import cross_val_score >>> scores = cross_val_score(pipeline, X, y) >>> print(f"Mean accuracy: {scores.mean():.2f}+/-{scores.std():.2f}") - Mean accuracy: 0.46+/-0.07 + Mean accuracy: 0.43+/-0.05 .. _randomness: @@ -398,7 +392,7 @@ each case**: be the same across all folds. - Since `rf_inst` was passed a `RandomState` instance, each call to `fit` starts from a different RNG. As a result, the random subset of features - will be different for each folds. + will be different for each fold. While having a constant estimator RNG across folds isn't inherently wrong, we usually want CV results that are robust w.r.t. the estimator's randomness. As @@ -414,43 +408,40 @@ it will allow the estimator RNG to vary for each fold. illustration purpose: what matters is what we pass to the :class:`~sklearn.ensemble.RandomForestClassifier` estimator. -|details-start| -**Cloning** -|details-split| +.. dropdown:: Cloning -Another subtle side effect of passing `RandomState` instances is how -:func:`~sklearn.base.clone` will work:: + Another subtle side effect of passing `RandomState` instances is how + :func:`~sklearn.base.clone` will work:: - >>> from sklearn import clone - >>> from sklearn.ensemble import RandomForestClassifier - >>> import numpy as np + >>> from sklearn import clone + >>> from sklearn.ensemble import RandomForestClassifier + >>> import numpy as np + + >>> rng = np.random.RandomState(0) + >>> a = RandomForestClassifier(random_state=rng) + >>> b = clone(a) + + Since a `RandomState` instance was passed to `a`, `a` and `b` are not clones + in the strict sense, but rather clones in the statistical sense: `a` and `b` + will still be different models, even when calling `fit(X, y)` on the same + data. Moreover, `a` and `b` will influence each other since they share the + same internal RNG: calling `a.fit` will consume `b`'s RNG, and calling + `b.fit` will consume `a`'s RNG, since they are the same. This bit is true for + any estimators that share a `random_state` parameter; it is not specific to + clones. + + If an integer were passed, `a` and `b` would be exact clones and they would not + influence each other. + + .. warning:: + Even though :func:`~sklearn.base.clone` is rarely used in user code, it is + called pervasively throughout scikit-learn codebase: in particular, most + meta-estimators that accept non-fitted estimators call + :func:`~sklearn.base.clone` internally + (:class:`~sklearn.model_selection.GridSearchCV`, + :class:`~sklearn.ensemble.StackingClassifier`, + :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.). - >>> rng = np.random.RandomState(0) - >>> a = RandomForestClassifier(random_state=rng) - >>> b = clone(a) - -Since a `RandomState` instance was passed to `a`, `a` and `b` are not clones -in the strict sense, but rather clones in the statistical sense: `a` and `b` -will still be different models, even when calling `fit(X, y)` on the same -data. Moreover, `a` and `b` will influence each-other since they share the -same internal RNG: calling `a.fit` will consume `b`'s RNG, and calling -`b.fit` will consume `a`'s RNG, since they are the same. This bit is true for -any estimators that share a `random_state` parameter; it is not specific to -clones. - -If an integer were passed, `a` and `b` would be exact clones and they would not -influence each other. - -.. warning:: - Even though :func:`~sklearn.base.clone` is rarely used in user code, it is - called pervasively throughout scikit-learn codebase: in particular, most - meta-estimators that accept non-fitted estimators call - :func:`~sklearn.base.clone` internally - (:class:`~sklearn.model_selection.GridSearchCV`, - :class:`~sklearn.ensemble.StackingClassifier`, - :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.). - -|details-end| CV splitters ............ @@ -558,10 +549,10 @@ When we evaluate a randomized estimator performance by cross-validation, we want to make sure that the estimator can yield accurate predictions for new data, but we also want to make sure that the estimator is robust w.r.t. its random initialization. For example, we would like the random weights -initialization of a :class:`~sklearn.linear_model.SGDClassifier` to be +initialization of an :class:`~sklearn.linear_model.SGDClassifier` to be consistently good across all folds: otherwise, when we train that estimator on new data, we might get unlucky and the random initialization may lead to -bad performance. Similarly, we want a random forest to be robust w.r.t the +bad performance. Similarly, we want a random forest to be robust w.r.t. the set of randomly selected features that each tree will be using. For these reasons, it is preferable to evaluate the cross-validation diff --git a/doc/communication_team.rst b/doc/communication_team.rst index 30e4f1169cfc9..fb9666f0b42f7 100644 --- a/doc/communication_team.rst +++ b/doc/communication_team.rst @@ -7,7 +7,7 @@

-

Lauren Burke

+

Lauren Burke-McCarthy


diff --git a/doc/computing.rst b/doc/computing.rst index 6732b754918b0..9f166432006b2 100644 --- a/doc/computing.rst +++ b/doc/computing.rst @@ -1,13 +1,7 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - ============================ Computing with scikit-learn ============================ -.. include:: includes/big_toc_css.rst - .. toctree:: :maxdepth: 2 diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst index d6864689502c2..4af79206dae1c 100644 --- a/doc/computing/computational_performance.rst +++ b/doc/computing/computational_performance.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _computational_performance: .. currentmodule:: sklearn @@ -19,9 +15,9 @@ scikit-learn estimators in different contexts and provide some tips and tricks for overcoming performance bottlenecks. Prediction latency is measured as the elapsed time necessary to make a -prediction (e.g. in micro-seconds). Latency is often viewed as a distribution +prediction (e.g. in microseconds). Latency is often viewed as a distribution and operations engineers often focus on the latency at a given percentile of -this distribution (e.g. the 90 percentile). +this distribution (e.g. the 90th percentile). Prediction throughput is defined as the number of predictions the software can deliver in a given amount of time (e.g. in predictions per second). @@ -34,7 +30,7 @@ to take into account the same exact properties of the data as more complex ones. Prediction Latency ------------------ -One of the most straight-forward concerns one may have when using/choosing a +One of the most straightforward concerns one may have when using/choosing a machine learning toolkit is the latency at which predictions can be made in a production environment. @@ -356,7 +352,7 @@ feature selection components in a pipeline once we know which features to keep from a previous run. Finally, it can help reduce processing time and I/O usage upstream in the data access and feature extraction layers by not collecting and building features that are discarded by the model. For instance -if the raw data come from a database, it can make it possible to write simpler +if the raw data come from a database, it is possible to write simpler and faster queries or reduce I/O usage by making the queries return lighter records. At the moment, reshaping needs to be performed manually in scikit-learn. diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst index 53cef5603c5be..d2ff106aec3be 100644 --- a/doc/computing/parallelism.rst +++ b/doc/computing/parallelism.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - Parallelism, resource management, and configuration =================================================== @@ -76,7 +72,7 @@ In practice, whether parallelism is helpful at improving runtime depends on many factors. It is usually a good idea to experiment rather than assuming that increasing the number of workers is always a good thing. In some cases it can be highly detrimental to performance to run multiple copies of some -estimators or functions in parallel (see oversubscription below). +estimators or functions in parallel (see :ref:`oversubscription` below). Lower-level parallelism with OpenMP ................................... @@ -107,7 +103,7 @@ such as MKL, OpenBLAS or BLIS. You can control the exact number of threads used by BLAS for each library using environment variables, namely: -- ``MKL_NUM_THREADS`` sets the number of thread MKL uses, +- ``MKL_NUM_THREADS`` sets the number of threads MKL uses, - ``OPENBLAS_NUM_THREADS`` sets the number of threads OpenBLAS uses - ``BLIS_NUM_THREADS`` sets the number of threads BLIS uses @@ -126,11 +122,13 @@ for different values of `OMP_NUM_THREADS`: distributed on pypi.org (i.e. the ones installed via ``pip install``) and on the conda-forge channel (i.e. the ones installed via ``conda install --channel conda-forge``) are linked with OpenBLAS, while - NumPy and SciPy packages packages shipped on the ``defaults`` conda + NumPy and SciPy packages shipped on the ``defaults`` conda channel from Anaconda.org (i.e. the ones installed via ``conda install``) are linked by default with MKL. +.. _oversubscription: + Oversubscription: spawning too many threads ........................................... @@ -231,19 +229,17 @@ state of the aforementioned singletons. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Controls the seeding of the random number generator used in tests that rely on -the `global_random_seed`` fixture. +the `global_random_seed` fixture. All tests that use this fixture accept the contract that they should deterministically pass for any seed value from 0 to 99 included. -If the `SKLEARN_TESTS_GLOBAL_RANDOM_SEED` environment variable is set to -`"any"` (which should be the case on nightly builds on the CI), the fixture -will choose an arbitrary seed in the above range (based on the BUILD_NUMBER or -the current day) and all fixtured tests will run for that specific seed. The -goal is to ensure that, over time, our CI will run all tests with different -seeds while keeping the test duration of a single run of the full test suite -limited. This will check that the assertions of tests written to use this -fixture are not dependent on a specific seed value. +In nightly CI builds, the `SKLEARN_TESTS_GLOBAL_RANDOM_SEED` environment +variable is drawn randomly in the above range and all fixtured tests will run +for that specific seed. The goal is to ensure that, over time, our CI will run +all tests with different seeds while keeping the test duration of a single run +of the full test suite limited. This will check that the assertions of tests +written to use this fixture are not dependent on a specific seed value. The range of admissible seed values is limited to [0, 99] because it is often not possible to write a test that can work for any possible seed and we want to @@ -254,8 +250,6 @@ Valid values for `SKLEARN_TESTS_GLOBAL_RANDOM_SEED`: - `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="42"`: run tests with a fixed seed of 42 - `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="40-42"`: run the tests with all seeds between 40 and 42 included -- `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="any"`: run the tests with an arbitrary - seed selected between 0 and 99 included - `SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all"`: run the tests with all seeds between 0 and 99 included. This can take a long time: only use for individual tests, not the full test suite! @@ -304,7 +298,7 @@ segfaults. When this environment variable is set to a non zero value, the debug symbols will be included in the compiled C extensions. Only debug symbols for POSIX -systems is configured. +systems are configured. `SKLEARN_PAIRWISE_DIST_CHUNK_SIZE` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -333,7 +327,7 @@ you can set `SKLEARN_WARNINGS_AS_ERRORS=1`. By default, warnings are not turned into errors. This is the case if `SKLEARN_WARNINGS_AS_ERRORS` is unset, or `SKLEARN_WARNINGS_AS_ERRORS=0`. -This environment variable use specific warning filters to ignore some warnings, +This environment variable uses specific warning filters to ignore some warnings, since sometimes warnings originate from third-party libraries and there is not much we can do about it. You can see the warning filters in the `_get_warnings_filters_info_list` function in `sklearn/utils/_testing.py`. diff --git a/doc/computing/scaling_strategies.rst b/doc/computing/scaling_strategies.rst index 143643131b0e8..286a1e79d0a8c 100644 --- a/doc/computing/scaling_strategies.rst +++ b/doc/computing/scaling_strategies.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _scaling_strategies: Strategies to scale computationally: bigger data diff --git a/doc/conf.py b/doc/conf.py index 9d77fc68d0f71..1113d4b2c100a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,13 +10,14 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import json import os import re import sys import warnings from datetime import datetime -from io import StringIO from pathlib import Path +from urllib.request import urlopen from sklearn.externals._packaging.version import parse from sklearn.utils._testing import turn_warnings_into_errors @@ -25,13 +26,18 @@ # directory, add these directories to sys.path here. If the directory # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. +sys.path.insert(0, os.path.abspath(".")) sys.path.insert(0, os.path.abspath("sphinxext")) +import jinja2 import sphinx_gallery from github_link import make_linkcode_resolve +from sphinx.util.logging import getLogger from sphinx_gallery.notebook import add_code_cell, add_markdown_cell from sphinx_gallery.sorting import ExampleTitleSortKey +logger = getLogger(__name__) + try: # Configure plotly to integrate its output into the HTML pages generated by # sphinx-gallery. @@ -56,14 +62,20 @@ "sphinx.ext.intersphinx", "sphinx.ext.imgconverter", "sphinx_gallery.gen_gallery", - "sphinx_issues", - "add_toctree_functions", "sphinx-prompt", "sphinx_copybutton", "sphinxext.opengraph", - "doi_role", - "allow_nan_estimators", "matplotlib.sphinxext.plot_directive", + "sphinxcontrib.sass", + "sphinx_remove_toctrees", + "sphinx_design", + # See sphinxext/ + "allow_nan_estimators", + "autoshortsummary", + "doi_role", + "dropdown_anchors", + "override_pst_pagetoc", + "sphinx_issues", ] # Specify how to identify the prompt when copying code snippets @@ -96,8 +108,12 @@ plot_html_show_formats = False plot_html_show_source_link = False -# this is needed for some reason... -# see https://github.com/numpy/numpydoc/issues/69 +# We do not need the table of class members because `sphinxext/override_pst_pagetoc.py` +# will show them in the secondary sidebar +numpydoc_show_class_members = False +numpydoc_show_inherited_class_members = False + +# We want in-page toc of class members instead of a separate page for each entry numpydoc_class_members_toctree = False @@ -111,8 +127,6 @@ extensions.append("sphinx.ext.mathjax") mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js" -autodoc_default_options = {"members": True, "inherited-members": True} - # Add any paths that contain templates here, relative to this directory. templates_path = ["templates"] @@ -123,10 +137,10 @@ source_suffix = ".rst" # The encoding of source files. -# source_encoding = 'utf-8' +source_encoding = "utf-8" # The main toctree document. -root_doc = "contents" +root_doc = "index" # General information about the project. project = "scikit-learn" @@ -160,7 +174,13 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build", "templates", "includes", "themes"] +exclude_patterns = [ + "_build", + "templates", + "includes", + "**/sg_execution_times.rst", + "whats_new/upcoming_changes", +] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -177,9 +197,6 @@ # output. They are ignored by default. # show_authors = False -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -188,21 +205,103 @@ # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = "scikit-learn-modern" +html_theme = "pydata_sphinx_theme" + +# This config option is used to generate the canonical links in the header +# of every page. The canonical link is needed to prevent search engines from +# returning results pointing to old scikit-learn versions. +html_baseurl = "https://scikit-learn.org/stable/" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "legacy_google_analytics": True, - "analytics": True, - "mathjax_path": mathjax_path, - "link_to_live_contributing_page": not parsed_version.is_devrelease, + # -- General configuration ------------------------------------------------ + "sidebar_includehidden": True, + "use_edit_page_button": True, + "external_links": [], + "icon_links_label": "Icon Links", + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/scikit-learn/scikit-learn", + "icon": "fa-brands fa-square-github", + "type": "fontawesome", + }, + ], + "analytics": { + "plausible_analytics_domain": "scikit-learn.org", + "plausible_analytics_url": "https://views.scientific-python.org/js/script.js", + }, + # If "prev-next" is included in article_footer_items, then setting show_prev_next + # to True would repeat prev and next links. See + # https://github.com/pydata/pydata-sphinx-theme/blob/b731dc230bc26a3d1d1bb039c56c977a9b3d25d8/src/pydata_sphinx_theme/theme/pydata_sphinx_theme/layout.html#L118-L129 + "show_prev_next": False, + "search_bar_text": "Search the docs ...", + "navigation_with_keys": False, + "collapse_navigation": False, + "navigation_depth": 2, + "show_nav_level": 1, + "show_toc_level": 1, + "navbar_align": "left", + "header_links_before_dropdown": 5, + "header_dropdown_text": "More", + # The switcher requires a JSON file with the list of documentation versions, which + # is generated by the script `build_tools/circle/list_versions.py` and placed under + # the `js/` static directory; it will then be copied to the `_static` directory in + # the built documentation + "switcher": { + "json_url": "https://scikit-learn.org/dev/_static/versions.json", + "version_match": release, + }, + # check_switcher may be set to False if docbuild pipeline fails. See + # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/version-dropdown.html#configure-switcher-json-url + "check_switcher": True, + "pygments_light_style": "tango", + "pygments_dark_style": "monokai", + "logo": { + "alt_text": "scikit-learn homepage", + "image_relative": "logos/scikit-learn-logo-small.png", + "image_light": "logos/scikit-learn-logo-small.png", + "image_dark": "logos/scikit-learn-logo-small.png", + }, + "surface_warnings": True, + # -- Template placement in theme layouts ---------------------------------- + "navbar_start": ["navbar-logo"], + # Note that the alignment of navbar_center is controlled by navbar_align + "navbar_center": ["navbar-nav"], + "navbar_end": ["theme-switcher", "navbar-icon-links", "version-switcher"], + # navbar_persistent is persistent right (even when on mobiles) + "navbar_persistent": ["search-button"], + "article_header_start": ["breadcrumbs"], + "article_header_end": [], + "article_footer_items": ["prev-next"], + "content_footer_items": [], + # Use html_sidebars that map page patterns to list of sidebar templates + "primary_sidebar_end": [], + "footer_start": ["copyright"], + "footer_center": [], + "footer_end": [], + # When specified as a dictionary, the keys should follow glob-style patterns, as in + # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns + # In particular, "**" specifies the default for all pages + # Use :html_theme.sidebar_secondary.remove: for file-wide removal + "secondary_sidebar_items": { + "**": [ + "page-toc", + "sourcelink", + # Sphinx-Gallery-specific sidebar components + # https://sphinx-gallery.github.io/stable/advanced.html#using-sphinx-gallery-sidebar-components + "sg_download_links", + "sg_launcher_links", + ], + }, + "show_version_warning_banner": True, + "announcement": None, } # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ["themes"] - +# html_theme_path = ["themes"] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -211,10 +310,6 @@ # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = "scikit-learn" -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "logos/scikit-learn-logo-small.png" - # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. @@ -223,19 +318,76 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["images"] +html_static_path = ["images", "css", "js"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # Custom sidebar templates, maps document names to template names. -# html_sidebars = {} +# Workaround for removing the left sidebar on pages without TOC +# A better solution would be to follow the merge of: +# https://github.com/pydata/pydata-sphinx-theme/pull/1682 +html_sidebars = { + "install": [], + "getting_started": [], + "glossary": [], + "faq": [], + "support": [], + "related_projects": [], + "roadmap": [], + "governance": [], + "about": [], +} # Additional templates that should be rendered to pages, maps page names to # template names. html_additional_pages = {"index": "index.html"} +# Additional files to copy +# html_extra_path = [] + +# Additional JS files +html_js_files = [ + "scripts/dropdown.js", + "scripts/version-switcher.js", + "scripts/sg_plotly_resize.js", +] + +# Compile scss files into css files using sphinxcontrib-sass +sass_src_dir, sass_out_dir = "scss", "css/styles" +sass_targets = { + f"{file.stem}.scss": f"{file.stem}.css" + for file in Path(sass_src_dir).glob("*.scss") +} + +# Additional CSS files, should be subset of the values of `sass_targets` +html_css_files = ["styles/colors.css", "styles/custom.css"] + + +def add_js_css_files(app, pagename, templatename, context, doctree): + """Load additional JS and CSS files only for certain pages. + + Note that `html_js_files` and `html_css_files` are included in all pages and + should be used for the ones that are used by multiple pages. All page-specific + JS and CSS files should be added here instead. + """ + if pagename == "api/index": + # External: jQuery and DataTables + app.add_js_file("https://code.jquery.com/jquery-3.7.0.js") + app.add_js_file("https://cdn.datatables.net/2.0.0/js/dataTables.min.js") + app.add_css_file( + "https://cdn.datatables.net/2.0.0/css/dataTables.dataTables.min.css" + ) + # Internal: API search initialization and styling + app.add_js_file("scripts/api-search.js") + app.add_css_file("styles/api-search.css") + elif pagename == "index": + app.add_css_file("styles/index.css") + elif pagename.startswith("modules/generated/"): + app.add_css_file("styles/api.css") + + # If false, no module index is generated. html_domain_indices = False @@ -285,6 +437,10 @@ # redirects dictionary maps from old links to new links redirects = { "documentation": "index", + "contents": "index", + "preface": "index", + "modules/classes": "api/index", + "tutorial/machine_learning_map/index": "machine_learning_map", "auto_examples/feature_selection/plot_permutation_test_for_classification": ( "auto_examples/model_selection/plot_permutation_tests_for_classification" ), @@ -292,8 +448,17 @@ "auto_examples/linear_model/plot_bayesian_ridge": ( "auto_examples/linear_model/plot_ard" ), - "auto_examples/model_selection/grid_search_text_feature_extraction.py": ( - "auto_examples/model_selection/plot_grid_search_text_feature_extraction.py" + "auto_examples/model_selection/grid_search_text_feature_extraction": ( + "auto_examples/model_selection/plot_grid_search_text_feature_extraction" + ), + "auto_examples/model_selection/plot_validation_curve": ( + "auto_examples/model_selection/plot_train_error_vs_test_error" + ), + "auto_examples/datasets/plot_digits_last_image": ( + "auto_examples/exercises/plot_digits_classification_exercises" + ), + "auto_examples/datasets/plot_random_dataset": ( + "auto_examples/classification/plot_classifier_comparison" ), "auto_examples/miscellaneous/plot_changed_only_pprint_parameter": ( "auto_examples/miscellaneous/plot_estimator_representation" @@ -301,46 +466,57 @@ "auto_examples/decomposition/plot_beta_divergence": ( "auto_examples/applications/plot_topics_extraction_with_nmf_lda" ), + "auto_examples/svm/plot_svm_nonlinear": "auto_examples/svm/plot_svm_kernels", "auto_examples/ensemble/plot_adaboost_hastie_10_2": ( "auto_examples/ensemble/plot_adaboost_multiclass" ), "auto_examples/decomposition/plot_pca_3d": ( "auto_examples/decomposition/plot_pca_iris" ), - "auto_examples/exercises/plot_cv_digits.py": ( - "auto_examples/model_selection/plot_nested_cross_validation_iris.py" + "auto_examples/exercises/plot_cv_digits": ( + "auto_examples/model_selection/plot_nested_cross_validation_iris" + ), + "auto_examples/linear_model/plot_lasso_lars": ( + "auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path" + ), + "auto_examples/linear_model/plot_lasso_coordinate_descent_path": ( + "auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path" + ), + "auto_examples/cluster/plot_color_quantization": ( + "auto_examples/cluster/plot_face_compress" + ), + "auto_examples/cluster/plot_cluster_iris": ( + "auto_examples/cluster/plot_kmeans_assumptions" + ), + "auto_examples/ensemble/plot_forest_importances_faces": ( + "auto_examples/ensemble/plot_forest_importances" + ), + "auto_examples/ensemble/plot_voting_probas": ( + "auto_examples/ensemble/plot_voting_decision_regions" + ), + "auto_examples/datasets/plot_iris_dataset": ( + "auto_examples/decomposition/plot_pca_iris" + ), + "auto_examples/linear_model/plot_iris_logistic": ( + "auto_examples/linear_model/plot_logistic_multinomial" + ), + "auto_examples/linear_model/plot_ols_3d": ("auto_examples/linear_model/plot_ols"), + "auto_examples/linear_model/plot_ols": "auto_examples/linear_model/plot_ols_ridge", + "auto_examples/linear_model/plot_ols_ridge_variance": ( + "auto_examples/linear_model/plot_ols_ridge" + ), + "auto_examples/linear_model/plot_sgd_comparison": ( + "auto_examples/linear_model/plot_sgd_loss_functions" ), } html_context["redirects"] = redirects for old_link in redirects: html_additional_pages[old_link] = "redirects.html" -# Not showing the search summary makes the search page load faster. -html_show_search_summary = True +# See https://github.com/scikit-learn/scikit-learn/pull/22550 +html_context["is_devrelease"] = parsed_version.is_devrelease -# The "summary-anchor" IDs will be overwritten via JavaScript to be unique. -# See `doc/theme/scikit-learn-modern/static/js/details-permalink.js`. -rst_prolog = """ -.. |details-start| raw:: html - -
- - -.. |details-split| raw:: html - - Click for more details - - -
- -.. |details-end| raw:: html - -
-
- -""" - # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). @@ -509,6 +685,23 @@ def notebook_modification_function(notebook_content, notebook_filename): # imports inside functions code_lines.extend(["import matplotlib", "import pandas"]) + # Work around https://github.com/jupyterlite/pyodide-kernel/issues/166 + # and https://github.com/pyodide/micropip/issues/223 by installing the + # dependencies first, and then scikit-learn from Anaconda.org. + if "dev" in release: + dev_docs_specific_code = [ + "import piplite", + "import joblib", + "import threadpoolctl", + "import scipy", + "await piplite.install(\n" + f" 'scikit-learn=={release}',\n" + " index_urls='https://pypi.anaconda.org/scientific-python-nightly-wheels/simple',\n" + ")", + ] + + code_lines.extend(dev_docs_specific_code) + if code_lines: code_lines = ["# JupyterLite-specific code"] + code_lines code = "\n".join(code_lines) @@ -527,14 +720,16 @@ def reset_sklearn_config(gallery_conf, fname): sklearn.set_config(**default_global_config) +sg_examples_dir = "../examples" +sg_gallery_dir = "auto_examples" sphinx_gallery_conf = { "doc_module": "sklearn", "backreferences_dir": os.path.join("modules", "generated"), "show_memory": False, "reference_url": {"sklearn": None}, - "examples_dirs": ["../examples"], - "gallery_dirs": ["auto_examples"], - "subsection_order": SubSectionTitleOrder("../examples"), + "examples_dirs": [sg_examples_dir], + "gallery_dirs": [sg_gallery_dir], + "subsection_order": SubSectionTitleOrder(sg_examples_dir), "within_subsection_order": SKExampleTitleSortKey, "binder": { "org": "scikit-learn", @@ -548,7 +743,7 @@ def reset_sklearn_config(gallery_conf, fname): "inspect_global_variables": False, "remove_config_comments": True, "plot_gallery": "True", - "recommender": {"enable": True, "n_examples": 5, "min_df": 12}, + "recommender": {"enable": True, "n_examples": 4, "min_df": 12}, "reset_modules": ("matplotlib", "seaborn", reset_sklearn_config), } if with_jupyterlite: @@ -556,6 +751,17 @@ def reset_sklearn_config(gallery_conf, fname): "notebook_modification_function": notebook_modification_function } +# For the index page of the gallery and each nested section, we hide the secondary +# sidebar by specifying an empty list (no components), because there is no meaningful +# in-page toc for these pages, and they are generated so "sourcelink" is not useful +# either. +html_theme_options["secondary_sidebar_items"][f"{sg_gallery_dir}/index"] = [] +for sub_sg_dir in (Path(".") / sg_examples_dir).iterdir(): + if sub_sg_dir.is_dir(): + html_theme_options["secondary_sidebar_items"][ + f"{sg_gallery_dir}/{sub_sg_dir.name}/index" + ] = [] + # The following dictionary contains the information used to create the # thumbnails for the front page of the scikit-learn home page. @@ -566,8 +772,10 @@ def reset_sklearn_config(gallery_conf, fname): # enable experimental module so that experimental estimators can be # discovered properly by sphinx -from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.experimental import ( # noqa: F401 + enable_halving_search_cv, + enable_iterative_imputer, +) def make_carousel_thumbs(app, exception): @@ -605,73 +813,6 @@ def filter_search_index(app, exception): f.write(searchindex_text) -def generate_min_dependency_table(app): - """Generate min dependency table for docs.""" - from sklearn._min_dependencies import dependent_packages - - # get length of header - package_header_len = max(len(package) for package in dependent_packages) + 4 - version_header_len = len("Minimum Version") + 4 - tags_header_len = max(len(tags) for _, tags in dependent_packages.values()) + 4 - - output = StringIO() - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - dependency_title = "Dependency" - version_title = "Minimum Version" - tags_title = "Purpose" - - output.write( - f"{dependency_title:<{package_header_len}} " - f"{version_title:<{version_header_len}} " - f"{tags_title}\n" - ) - - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - - for package, (version, tags) in dependent_packages.items(): - output.write( - f"{package:<{package_header_len}} {version:<{version_header_len}} {tags}\n" - ) - - output.write( - " ".join( - ["=" * package_header_len, "=" * version_header_len, "=" * tags_header_len] - ) - ) - output.write("\n") - output = output.getvalue() - - with (Path(".") / "min_dependency_table.rst").open("w") as f: - f.write(output) - - -def generate_min_dependency_substitutions(app): - """Generate min dependency substitutions for docs.""" - from sklearn._min_dependencies import dependent_packages - - output = StringIO() - - for package, (version, _) in dependent_packages.items(): - package = package.capitalize() - output.write(f".. |{package}MinVersion| replace:: {version}") - output.write("\n") - - output = output.getvalue() - - with (Path(".") / "min_dependency_substitutions.rst").open("w") as f: - f.write(output) - - # Config for sphinx_issues # we use the issues path for PRs since the issues URL will forward @@ -683,17 +824,29 @@ def disable_plot_gallery_for_linkcheck(app): sphinx_gallery_conf["plot_gallery"] = "False" +def skip_properties(app, what, name, obj, skip, options): + """Skip properties that are fitted attributes""" + if isinstance(obj, property): + if name.endswith("_") and not name.startswith("_"): + return True + + return skip + + def setup(app): # do not run the examples when using linkcheck by using a small priority # (default priority is 500 and sphinx-gallery using builder-inited event too) app.connect("builder-inited", disable_plot_gallery_for_linkcheck, priority=50) - app.connect("builder-inited", generate_min_dependency_table) - app.connect("builder-inited", generate_min_dependency_substitutions) - # to hide/show the prompt in code examples: + # triggered just before the HTML for an individual page is created + app.connect("html-page-context", add_js_css_files) + + # to hide/show the prompt in code examples app.connect("build-finished", make_carousel_thumbs) app.connect("build-finished", filter_search_index) + app.connect("autodoc-skip-member", skip_properties) + # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( @@ -812,3 +965,128 @@ def setup(app): linkcheck_request_headers = { "https://github.com/": {"Authorization": f"token {github_token}"}, } + + +def infer_next_release_versions(): + """Infer the most likely next release versions to make.""" + all_version_full = {"rc": "0.99.0rc1", "final": "0.99.0", "bf": "0.98.1"} + all_version_short = {"rc": "0.99", "final": "0.99", "bf": "0.98"} + all_previous_tag = {"rc": "unused", "final": "0.98.33", "bf": "0.97.22"} + + try: + # Fetch the version switcher JSON; see `html_theme_options` for more details + versions_json = json.loads( + urlopen(html_theme_options["switcher"]["json_url"], timeout=10).read() + ) + + # See `build_tools/circle/list_versions.py`, stable is always the second entry + stable_version = parse(versions_json[1]["version"]) + last_stable_version = parse(versions_json[2]["version"]) + next_major_minor = f"{stable_version.major}.{stable_version.minor + 1}" + + # RC + all_version_full["rc"] = f"{next_major_minor}.0rc1" + all_version_short["rc"] = next_major_minor + + # Major/Minor final + all_version_full["final"] = f"{next_major_minor}.0" + all_version_short["final"] = next_major_minor + all_previous_tag["final"] = stable_version.base_version + + # Bug-fix + all_version_full["bf"] = ( + f"{stable_version.major}.{stable_version.minor}.{stable_version.micro + 1}" + ) + all_version_short["bf"] = f"{stable_version.major}.{stable_version.minor}" + all_previous_tag["bf"] = last_stable_version.base_version + except Exception as e: + logger.warning( + "Failed to infer all possible next release versions because of " + f"{type(e).__name__}: {e}" + ) + + return { + "version_full": all_version_full, + "version_short": all_version_short, + "previous_tag": all_previous_tag, + } + + +# -- Convert .rst.template files to .rst --------------------------------------- + +from api_reference import API_REFERENCE, DEPRECATED_API_REFERENCE + +from sklearn._min_dependencies import dependent_packages + +# If development build, link to local page in the top navbar; otherwise link to the +# development version; see https://github.com/scikit-learn/scikit-learn/pull/22550 +if parsed_version.is_devrelease: + development_link = "developers/index" +else: + development_link = "https://scikit-learn.org/dev/developers/index.html" + +# Define the templates and target files for conversion +# Each entry is in the format (template name, file name, kwargs for rendering) +rst_templates = [ + ("index", "index", {"development_link": development_link}), + ( + "developers/maintainer", + "developers/maintainer", + {"inferred": infer_next_release_versions()}, + ), + ( + "min_dependency_table", + "min_dependency_table", + {"dependent_packages": dependent_packages}, + ), + ( + "min_dependency_substitutions", + "min_dependency_substitutions", + {"dependent_packages": dependent_packages}, + ), + ( + "api/index", + "api/index", + { + "API_REFERENCE": sorted(API_REFERENCE.items(), key=lambda x: x[0]), + "DEPRECATED_API_REFERENCE": sorted( + DEPRECATED_API_REFERENCE.items(), key=lambda x: x[0], reverse=True + ), + }, + ), +] + +# Convert each module API reference page +for module in API_REFERENCE: + rst_templates.append( + ( + "api/module", + f"api/{module}", + {"module": module, "module_info": API_REFERENCE[module]}, + ) + ) + +# Convert the deprecated API reference page (if there exists any) +if DEPRECATED_API_REFERENCE: + rst_templates.append( + ( + "api/deprecated", + "api/deprecated", + { + "DEPRECATED_API_REFERENCE": sorted( + DEPRECATED_API_REFERENCE.items(), key=lambda x: x[0], reverse=True + ) + }, + ) + ) + +for rst_template_name, rst_target_name, kwargs in rst_templates: + # Read the corresponding template file into jinja2 + with (Path(".") / f"{rst_template_name}.rst.template").open( + "r", encoding="utf-8" + ) as f: + t = jinja2.Template(f.read()) + + # Render the template and write to the target + with (Path(".") / f"{rst_target_name}.rst").open("w", encoding="utf-8") as f: + f.write(t.render(**kwargs)) diff --git a/doc/conftest.py b/doc/conftest.py index d66148ccc553f..ad8d6eb8cfb62 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,5 +1,4 @@ import os -import warnings from os import environ from os.path import exists, join @@ -10,7 +9,7 @@ from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME from sklearn.utils._testing import SkipTest, check_skip_network -from sklearn.utils.fixes import _IS_PYPY, np_base_version, parse_version +from sklearn.utils.fixes import np_base_version, parse_version, sp_version def setup_labeled_faces(): @@ -34,8 +33,6 @@ def setup_twenty_newsgroups(): def setup_working_with_text_data(): - if _IS_PYPY and os.environ.get("CI", None): - raise SkipTest("Skipping too slow test with PyPy on CI") check_skip_network() cache_path = _pkl_filepath(get_data_home(), CACHE_NAME) if not exists(cache_path): @@ -44,7 +41,7 @@ def setup_working_with_text_data(): def setup_loading_other_datasets(): try: - import pandas # noqa + import pandas # noqa: F401 except ImportError: raise SkipTest("Skipping loading_other_datasets.rst, pandas not installed") @@ -59,49 +56,35 @@ def setup_loading_other_datasets(): def setup_compose(): try: - import pandas # noqa + import pandas # noqa: F401 except ImportError: raise SkipTest("Skipping compose.rst, pandas not installed") def setup_impute(): try: - import pandas # noqa + import pandas # noqa: F401 except ImportError: raise SkipTest("Skipping impute.rst, pandas not installed") def setup_grid_search(): try: - import pandas # noqa + import pandas # noqa: F401 except ImportError: raise SkipTest("Skipping grid_search.rst, pandas not installed") def setup_preprocessing(): try: - import pandas # noqa - - if parse_version(pandas.__version__) < parse_version("1.1.0"): - raise SkipTest("Skipping preprocessing.rst, pandas version < 1.1.0") + import pandas # noqa: F401 except ImportError: raise SkipTest("Skipping preprocessing.rst, pandas not installed") -def setup_unsupervised_learning(): - try: - import skimage # noqa - except ImportError: - raise SkipTest("Skipping unsupervised_learning.rst, scikit-image not installed") - # ignore deprecation warnings from scipy.misc.face - warnings.filterwarnings( - "ignore", "The binary mode of fromstring", DeprecationWarning - ) - - def skip_if_matplotlib_not_installed(fname): try: - import matplotlib # noqa + import matplotlib # noqa: F401 except ImportError: basename = os.path.basename(fname) raise SkipTest(f"Skipping doctests for {basename}, matplotlib not installed") @@ -109,7 +92,7 @@ def skip_if_matplotlib_not_installed(fname): def skip_if_cupy_not_installed(fname): try: - import cupy # noqa + import cupy # noqa: F401 except ImportError: basename = os.path.basename(fname) raise SkipTest(f"Skipping doctests for {basename}, cupy not installed") @@ -128,10 +111,6 @@ def pytest_runtest_setup(item): setup_rcv1() elif fname.endswith("datasets/twenty_newsgroups.rst") or is_index: setup_twenty_newsgroups() - elif ( - fname.endswith("tutorial/text_analytics/working_with_text_data.rst") or is_index - ): - setup_working_with_text_data() elif fname.endswith("modules/compose.rst") or is_index: setup_compose() elif fname.endswith("datasets/loading_other_datasets.rst"): @@ -142,14 +121,10 @@ def pytest_runtest_setup(item): setup_grid_search() elif fname.endswith("modules/preprocessing.rst"): setup_preprocessing() - elif fname.endswith("statistical_inference/unsupervised_learning.rst"): - setup_unsupervised_learning() rst_files_requiring_matplotlib = [ "modules/partial_dependence.rst", "modules/tree.rst", - "tutorial/statistical_inference/settings.rst", - "tutorial/statistical_inference/supervised_learning.rst", ] for each in rst_files_requiring_matplotlib: if fname.endswith(each): @@ -178,13 +153,17 @@ def pytest_collection_modifyitems(config, items): items : list of collected items """ skip_doctests = False - if np_base_version >= parse_version("2"): - # Skip doctests when using numpy 2 for now. See the following discussion - # to decide what to do in the longer term: - # https://github.com/scikit-learn/scikit-learn/issues/27339 + if np_base_version < parse_version("2"): + # TODO: configure numpy to output scalar arrays as regular Python scalars + # once possible to improve readability of the tests docstrings. + # https://numpy.org/neps/nep-0051-scalar-representation.html#implementation reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" skip_doctests = True + if sp_version < parse_version("1.14"): + reason = "Scipy sparse matrix repr has changed in scipy 1.14" + skip_doctests = True + # Normally doctest has the entire module's scope. Here we set globs to an empty dict # to remove the module's scope: # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context diff --git a/doc/contents.rst b/doc/contents.rst deleted file mode 100644 index a28634621d558..0000000000000 --- a/doc/contents.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. include:: includes/big_toc_css.rst -.. include:: tune_toc.rst - -.. Places global toc into the sidebar - -:globalsidebartoc: True - -================= -Table Of Contents -================= - -.. Define an order for the Table of Contents: - -.. toctree:: - :maxdepth: 2 - - preface - tutorial/index - getting_started - user_guide - glossary - auto_examples/index - modules/classes - developers/index diff --git a/doc/contributor_experience_team.rst b/doc/contributor_experience_team.rst index 7d942a07e6a7d..73ccd668b20cd 100644 --- a/doc/contributor_experience_team.rst +++ b/doc/contributor_experience_team.rst @@ -6,6 +6,10 @@ img.avatar {border-radius: 10px;}
+
+

Virgil Chan

+
+

Juan Carlos Alfaro Jiménez

@@ -30,6 +34,10 @@

Norbert Preining

+
+

Stefanie Senger

+
+

Reshama Shaikh

diff --git a/doc/css/.gitkeep b/doc/css/.gitkeep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/doc/data_transforms.rst b/doc/data_transforms.rst index 084214cb094f5..536539ec97007 100644 --- a/doc/data_transforms.rst +++ b/doc/data_transforms.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _data-transforms: Dataset transformations diff --git a/doc/datasets.rst b/doc/datasets.rst index b9484a02ce84c..f12e5095cc6a8 100644 --- a/doc/datasets.rst +++ b/doc/datasets.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _datasets: ========================= @@ -12,12 +6,9 @@ Dataset loading utilities .. currentmodule:: sklearn.datasets -The ``sklearn.datasets`` package embeds some small toy datasets -as introduced in the :ref:`Getting Started ` section. - -This package also features helpers to fetch larger datasets commonly -used by the machine learning community to benchmark algorithms on data -that comes from the 'real world'. +The ``sklearn.datasets`` package embeds some small toy datasets and provides helpers +to fetch larger datasets commonly used by the machine learning community to benchmark +algorithms on data that comes from the 'real world'. To evaluate the impact of the scale of the dataset (``n_samples`` and ``n_features``) while controlling the statistical properties of the data @@ -42,7 +33,7 @@ length ``n_samples``, containing the target values, with key ``target``. The Bunch object is a dictionary that exposes its keys as attributes. For more information about Bunch object, see :class:`~sklearn.utils.Bunch`. -It's also possible for almost all of these function to constrain the output +It's also possible for almost all of these functions to constrain the output to be a tuple containing only the data and the target, by setting the ``return_X_y`` parameter to ``True``. diff --git a/doc/datasets/loading_other_datasets.rst b/doc/datasets/loading_other_datasets.rst index fdd7fd1666cce..84d042f64c9d3 100644 --- a/doc/datasets/loading_other_datasets.rst +++ b/doc/datasets/loading_other_datasets.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _loading_other_datasets: Loading other datasets @@ -23,24 +19,29 @@ and pipelines on 2D data. load_sample_images load_sample_image -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png - :target: ../auto_examples/cluster/plot_color_quantization.html +.. plot:: + :context: close-figs :scale: 30 :align: right + :include-source: False + + import matplotlib.pyplot as plt + from sklearn.datasets import load_sample_image + china = load_sample_image("china.jpg") + plt.imshow(china) + plt.axis('off') + plt.tight_layout() + plt.show() .. warning:: The default coding of images is based on the ``uint8`` dtype to spare memory. Often machine learning algorithms work best if the input is converted to a floating point representation first. Also, - if you plan to use ``matplotlib.pyplpt.imshow``, don't forget to scale to the range + if you plan to use ``matplotlib.pyplot.imshow``, don't forget to scale to the range 0 - 1 as done in the following example. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py` - .. _libsvm_loader: Datasets in svmlight / libsvm format @@ -52,7 +53,7 @@ takes the form ``
+
+

Maren Westermann

+
+

Yao Xiao

diff --git a/doc/faq.rst b/doc/faq.rst index 8ddf0c4c238f6..99cb13c5be4d6 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -1,3 +1,32 @@ +.. raw:: html + + + .. _faq: ========================== @@ -9,8 +38,9 @@ Frequently Asked Questions Here we try to give some answers to questions that regularly pop up on the mailing list. .. contents:: Table of Contents - :local: - :depth: 2 + :local: + :depth: 2 + About the project ----------------- @@ -32,13 +62,10 @@ Apart from scikit-learn, another popular one is `scikit-image `_ (an alternative Python implementation with -a built-in just-in-time compiler). - -Note however that this support is still considered experimental and specific -components might behave slightly differently. Please refer to the test -suite of the specific module of interest for more details. +Due to limited maintainer resources and small number of users, using +scikit-learn with `PyPy `_ (an alternative Python +implementation with a built-in just-in-time compiler) is not officially +supported. How can I obtain permission to use the images in scikit-learn for my work? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -110,7 +137,7 @@ See :ref:`adding_graphical_models`. Will you add GPU support? ^^^^^^^^^^^^^^^^^^^^^^^^^ -Adding GPU support by default would introduce heavy harware-specific software +Adding GPU support by default would introduce heavy hardware-specific software dependencies and existing algorithms would need to be reimplemented. This would make it both harder for the average user to install scikit-learn and harder for the developers to maintain the code. @@ -154,21 +181,33 @@ discussed in :ref:`preprocessing_categorical_features`. See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. -Why does scikit-learn not directly work with, for example, :class:`pandas.DataFrame`? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The homogeneous NumPy and SciPy data objects currently expected are most -efficient to process for most operations. Extensive work would also be needed -to support Pandas categorical types. Restricting input to homogeneous -types therefore reduces maintenance cost and encourages usage of efficient -data structures. - -Note however that :class:`~sklearn.compose.ColumnTransformer` makes it -convenient to handle heterogeneous pandas dataframes by mapping homogeneous subsets of -dataframe columns selected by name or dtype to dedicated scikit-learn transformers. -Therefore :class:`~sklearn.compose.ColumnTransformer` are often used in the first -step of scikit-learn pipelines when dealing -with heterogeneous dataframes (see :ref:`pipeline` for more details). +Note that recently, :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and +:class:`~sklearn.ensemble.HistGradientBoostingRegressor` gained native support for +categorical features through the option `categorical_features="from_dtype"`. This +option relies on inferring which columns of the data are categorical based on the +:class:`pandas.CategoricalDtype` and :class:`polars.datatypes.Categorical` dtypes. + +Does scikit-learn work natively with various types of dataframes? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Scikit-learn has limited support for :class:`pandas.DataFrame` and +:class:`polars.DataFrame`. Scikit-learn estimators can accept both these dataframe types +as input, and scikit-learn transformers can output dataframes using the `set_output` +API. For more details, refer to +:ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`. + +However, the internal computations in scikit-learn estimators rely on numerical +operations that are more efficiently performed on homogeneous data structures such as +NumPy arrays or SciPy sparse matrices. As a result, most scikit-learn estimators will +internally convert dataframe inputs into these homogeneous data structures. Similarly, +dataframe outputs are generated from these homogeneous data structures. + +Also note that :class:`~sklearn.compose.ColumnTransformer` makes it convenient to handle +heterogeneous pandas dataframes by mapping homogeneous subsets of dataframe columns +selected by name or dtype to dedicated scikit-learn transformers. Therefore +:class:`~sklearn.compose.ColumnTransformer` are often used in the first step of +scikit-learn pipelines when dealing with heterogeneous dataframes (see :ref:`pipeline` +for more details). See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. @@ -321,14 +360,25 @@ long-term maintenance issues in open-source software, look at Using scikit-learn ------------------ +How do I get started with scikit-learn? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are new to scikit-learn, or looking to strengthen your understanding, +we highly recommend the **scikit-learn MOOC (Massive Open Online Course)**. + +See our :ref:`External Resources, Videos and Talks page ` +for more details. + What's the best way to get help on scikit-learn usage? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -**For general machine learning questions**, please use -`Cross Validated `_ with the ``[machine-learning]`` tag. -**For scikit-learn usage questions**, please use `Stack Overflow `_ -with the ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the `mailing list -`_. +* General machine learning questions: use `Cross Validated + `_ with the ``[machine-learning]`` tag. + +* scikit-learn usage questions: use `Stack Overflow + `_ with the + ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the `mailing list + `_. Please make sure to include a minimal reproduction code snippet (ideally shorter than 10 lines) that highlights your problem on a toy dataset (for instance from @@ -472,7 +522,7 @@ program. Insert the following instructions in your main script:: # call scikit-learn utils with n_jobs > 1 here -You can find more default on the new start methods in the `multiprocessing +You can find more details on the new start methods in the `multiprocessing documentation `_. .. _faq_mkl_threading: diff --git a/doc/getting_started.rst b/doc/getting_started.rst index cd4d953db1b8a..14e0178f0826b 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -53,6 +53,8 @@ new data. You don't need to re-train the estimator:: >>> clf.predict([[4, 5, 6], [14, 15, 16]]) # predict classes of new data array([0, 1]) +You can check :ref:`ml_map` on how to choose the right model for your use case. + Transformers and pre-processors ------------------------------- @@ -227,6 +229,3 @@ provide. You can also find an exhaustive list of the public API in the You can also look at our numerous :ref:`examples ` that illustrate the use of ``scikit-learn`` in many different contexts. - -The :ref:`tutorials ` also contain additional learning -resources. diff --git a/doc/glossary.rst b/doc/glossary.rst index 84a628b0f716d..caf6b952553c4 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -198,7 +198,8 @@ General Concepts This refers to the tests run on almost every estimator class in Scikit-learn to check they comply with basic API conventions. They are available for external use through - :func:`utils.estimator_checks.check_estimator`, with most of the + :func:`utils.estimator_checks.check_estimator` or + :func:`utils.estimator_checks.parametrize_with_checks`, with most of the implementation in ``sklearn/utils/estimator_checks.py``. Note: Some exceptions to the common testing regime are currently @@ -293,8 +294,8 @@ General Concepts error, but demand more computational resources, resulting in slower operations and increased memory usage. In contrast, 32-bit types promise enhanced operation speed and reduced memory consumption, but - introduce a larger floating-point error. The efficiency improvement are - dependent on lower level optimization such as like vectorization, + introduce a larger floating-point error. The efficiency improvements are + dependent on lower level optimization such as vectorization, single instruction multiple dispatch (SIMD), or cache optimization but crucially on the compatibility of the algorithm in use. @@ -407,8 +408,7 @@ General Concepts likelihoods. estimator tags - A proposed feature (e.g. :issue:`8022`) by which the capabilities of an - estimator are described through a set of semantic tags. This would + Estimator tags describe certain capabilities of an estimator. This would enable some runtime behaviors based on estimator inspection, but it also allows each estimator to be tested for appropriate invariances while being excepted from other :term:`common tests`. @@ -417,15 +417,6 @@ General Concepts the :term:`duck typing` of methods like ``predict_proba`` and through some special attributes on estimator objects: - .. glossary:: - - ``_estimator_type`` - This string-valued attribute identifies an estimator as being a - classifier, regressor, etc. It is set by mixins such as - :class:`base.ClassifierMixin`, but needs to be more explicitly - adopted on a :term:`meta-estimator`. Its value should usually be - checked by way of a helper such as :func:`base.is_classifier`. - For more detailed info, see :ref:`estimator_tags`. feature @@ -516,7 +507,7 @@ General Concepts joblib A Python library (https://joblib.readthedocs.io) used in Scikit-learn to - facilite simple parallelism and caching. Joblib is oriented towards + facilitate simple parallelism and caching. Joblib is oriented towards efficiently working with numpy arrays, such as through use of :term:`memory mapping`. See :ref:`parallelism` for more information. @@ -710,6 +701,9 @@ General Concepts Elsewhere a sample is called an instance, data point, or observation. ``n_samples`` indicates the number of samples in a dataset, being the number of rows in a data array :term:`X`. + Note that this definition is standard in machine learning and deviates from + statistics where it means *a set of individuals or objects collected or + selected*. sample property sample properties @@ -753,7 +747,7 @@ General Concepts sparse matrix sparse graph A representation of two-dimensional numeric data that is more memory - efficient the corresponding dense numpy array where almost all elements + efficient than the corresponding dense numpy array where almost all elements are zero. We use the :mod:`scipy.sparse` framework, which provides several underlying sparse data representations, or *formats*. Some formats are more efficient than others for particular tasks, and @@ -857,8 +851,8 @@ Class APIs and Estimator Types strategy over the binary classification problem. Classifiers must store a :term:`classes_` attribute after fitting, - and usually inherit from :class:`base.ClassifierMixin`, which sets - their :term:`_estimator_type` attribute. + and inherit from :class:`base.ClassifierMixin`, which sets + their corresponding :term:`estimator tags` correctly. A classifier can be distinguished from other estimators with :func:`~base.is_classifier`. @@ -1001,8 +995,8 @@ Class APIs and Estimator Types A :term:`supervised` (or :term:`semi-supervised`) :term:`predictor` with :term:`continuous` output values. - Regressors usually inherit from :class:`base.RegressorMixin`, which - sets their :term:`_estimator_type` attribute. + Regressors inherit from :class:`base.RegressorMixin`, which sets their + :term:`estimator tags` correctly. A regressor can be distinguished from other estimators with :func:`~base.is_regressor`. @@ -1702,9 +1696,15 @@ functions or non-estimator constructors. objects and avoid common pitfalls, you may refer to :ref:`randomness`. ``scoring`` - Specifies the score function to be maximized (usually by :ref:`cross - validation `), or -- in some cases -- multiple score - functions to be reported. The score function can be a string accepted + Depending on the object, can specify: + + * the score function to be maximized (usually by + :ref:`cross validation `), + * the multiple score functions to be reported, + * the score function to be used to check early stopping, or + * for visualization related objects, the score function to output or plot + + The score function can be a string accepted by :func:`metrics.get_scorer` or a callable :term:`scorer`, not to be confused with an :term:`evaluation metric`, as the latter have a more diverse API. ``scoring`` may also be set to None, in which case the @@ -1715,8 +1715,7 @@ functions or non-estimator constructors. either as a list of unique strings, a dictionary with names as keys and callables as values or a callable that returns a dictionary. Note that this does *not* specify which score function is to be maximized, and - another parameter such as ``refit`` maybe used for this purpose. - + another parameter such as ``refit`` may be used for this purpose. The ``scoring`` parameter is validated and interpreted using :func:`metrics.check_scoring`. @@ -1757,7 +1756,7 @@ functions or non-estimator constructors. Other models, usually using gradient-based solvers, have a different behavior. They all expose a ``max_iter`` parameter. The reported - ``n_iter_`` corresponds to the number of iteration done during the last + ``n_iter_`` corresponds to the number of iterations done during the last call to ``fit`` and will be at most ``max_iter``. Thus, we do not consider the state of the estimator since the initialization. @@ -1799,7 +1798,7 @@ See concept :term:`attribute`. the number of output features and :term:`n_features` is the number of input features. - See also :term:`components_` which is a similar attribute for linear + See also :term:`coef_` which is a similar attribute for linear predictors. ``coef_`` diff --git a/doc/governance.rst b/doc/governance.rst index d6b07afe4eeb4..5601f80573651 100644 --- a/doc/governance.rst +++ b/doc/governance.rst @@ -56,7 +56,8 @@ Core contributors that have not contributed to the project, corresponding to their role, in the past 12 months will be asked if they want to become emeritus members and recant their rights until they become active again. The list of members, active and emeritus (with dates at which they became active) is public -on the scikit-learn website. +on the scikit-learn website. It is the responsibility of the active core +contributors to send such a yearly reminder email. The following teams form the core contributors group: @@ -66,7 +67,7 @@ The following teams form the core contributors group: repeating patterns where people might struggle, and to help with improving those aspects of the project. - To this end, they have the required permissions on github to label and close + To this end, they have the required permissions on GitHub to label and close issues. :ref:`Their work ` is crucial to improve the communication in the project and limit the crowding of the issue tracker. @@ -157,7 +158,7 @@ are made according to the following rules: consensus), happens on the issue of pull-request page. * **Changes to the API principles and changes to dependencies or supported - versions** happen via a :ref:`slep` and follows the decision-making process + versions** happen via :ref:`slep` and follows the decision-making process outlined above. * **Changes to the governance model** follow the process outlined in `SLEP020 diff --git a/doc/images/Tidelift-logo-on-light.svg b/doc/images/Tidelift-logo-on-light.svg new file mode 100644 index 0000000000000..af12d68417235 --- /dev/null +++ b/doc/images/Tidelift-logo-on-light.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + diff --git a/doc/images/czi-small.png b/doc/images/czi-small.png new file mode 100644 index 0000000000000..7a6c81acb44a0 Binary files /dev/null and b/doc/images/czi-small.png differ diff --git a/doc/images/czi.png b/doc/images/czi.png new file mode 100644 index 0000000000000..9f2b6ebb26c5c Binary files /dev/null and b/doc/images/czi.png differ diff --git a/doc/images/czi_logo.svg b/doc/images/czi_logo.svg deleted file mode 100644 index c63b53cae25ac..0000000000000 --- a/doc/images/czi_logo.svg +++ /dev/null @@ -1,19 +0,0 @@ - - - - nav / elements / czi_mark_red - Created with Sketch. - - - - - - - - - - - - - - \ No newline at end of file diff --git a/doc/images/ml_map.README.rst b/doc/images/ml_map.README.rst new file mode 100644 index 0000000000000..645d2980591c2 --- /dev/null +++ b/doc/images/ml_map.README.rst @@ -0,0 +1,24 @@ +The scikit-learn machine learning cheat sheet was originally created by Andreas Mueller: +https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html + +The current version of the chart is located at `doc/images/ml_map.svg` in SVG+XML +format, created using [draw.io](https://draw.io/). To edit the chart, open the file in +draw.io, make changes, and save. This should update the chart in-place. Another option +would be to re-export the chart as SVG and replace the existing file. The options used +for exporting the chart are: + +- Zoom: 100% +- Border width: 15 +- Size: Diagram +- Transparent Background: False +- Appearance: Light + +Note that estimators nodes are clickable and should go to the estimator +documentation. After updating or re-exporting the SVG with draw.io, the links +may be prefixed with e.g. `https://app.diagrams.net/`. Remember to check and +remove them, for instance by replacing all occurrences of +`https://app.diagrams.net/./` with `./` with the following command: + +.. prompt:: bash + + perl -pi -e 's@https://app.diagrams.net/\./@./@g' doc/images/ml_map.svg diff --git a/doc/images/ml_map.png b/doc/images/ml_map.png deleted file mode 100644 index 73ebd9c05fcc4..0000000000000 Binary files a/doc/images/ml_map.png and /dev/null differ diff --git a/doc/images/ml_map.svg b/doc/images/ml_map.svg new file mode 100644 index 0000000000000..377e147c0d42c --- /dev/null +++ b/doc/images/ml_map.svg @@ -0,0 +1,4 @@ + + + +
START
START
>50
samples
>50...
get
more
data
get...
NO
NO
predicting a
category
predicting...
YES
YES
do you have
labeled
data
do you hav...
YES
YES
predicting a
quantity
predicting...
NO
NO
just
looking
just...
NO
NO
predicting
structure
predicting...
NO
NO
tough
luck
tough...
<100K
samples
<100K...
YES
YES
SGD
Classifier
SGD...
NO
NO
Linear
SVC
Linear...
YES
YES
text
data
text...
Kernel
Approximation
Kernel...
KNeighbors
Classifier
KNeighbors...
NO
NO
SVC
SVC
Ensemble
Classifiers
Ensemble...
Naive
Bayes
Naive...
YES
YES
classification
classification
number of
categories
known
number of...
NO
NO
<10K
samples
<10K...
<10K
samples
<10K...
NO
NO
NO
NO
YES
YES
MeanShift
MeanShift
VBGMM
VBGMM
YES
YES
MiniBatch
KMeans
MiniBatch...
NO
NO
clustering
clustering
KMeans
KMeans
YES
YES
Spectral
Clustering
Spectral...
GMM
GMM
<100K
samples
<100K...
YES
YES
few features
should be
important
few features...
YES
YES
SGD
Regressor
SGD...
NO
NO
Lasso
Lasso
ElasticNet
ElasticNet
YES
YES
RidgeRegression
RidgeRegression
SVR(kernel="linear")
SVR(kernel="linea...
NO
NO
SVR(kernel="rbf")
SVR(kernel="rbf...
Ensemble
Regressors
Ensemble...
regression
regression
Randomized
PCA
Randomized...
YES
YES
<10K
samples
<10K...
Kernel
Approximation
Kernel...
NO
NO
IsoMap
IsoMap
Spectral
Embedding
Spectral...
YES
YES
LLE
LLE
dimensionality
reduction
dimensionality...
scikit-learn
algorithm cheat sheet
scikit-learn...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
TRY
NEXT
TRY...
Text is not SVG - cannot display
diff --git a/doc/images/wellcome-trust-small.png b/doc/images/wellcome-trust-small.png new file mode 100644 index 0000000000000..32be045a080a2 Binary files /dev/null and b/doc/images/wellcome-trust-small.png differ diff --git a/doc/images/wellcome-trust.png b/doc/images/wellcome-trust.png new file mode 100644 index 0000000000000..4e74b033f0647 Binary files /dev/null and b/doc/images/wellcome-trust.png differ diff --git a/doc/includes/big_toc_css.rst b/doc/includes/big_toc_css.rst deleted file mode 100644 index a8ba83e99c5b8..0000000000000 --- a/doc/includes/big_toc_css.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. - File to ..include in a document with a big table of content, to give - it 'style' - -.. raw:: html - - - - - diff --git a/doc/includes/bigger_toc_css.rst b/doc/includes/bigger_toc_css.rst deleted file mode 100644 index d866bd145d883..0000000000000 --- a/doc/includes/bigger_toc_css.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. - File to ..include in a document with a very big table of content, to - give it 'style' - -.. raw:: html - - - - - diff --git a/doc/index.rst.template b/doc/index.rst.template new file mode 100644 index 0000000000000..f1f1f49836515 --- /dev/null +++ b/doc/index.rst.template @@ -0,0 +1,24 @@ +.. title:: Index + +.. Define the overall structure, that affects the prev-next buttons and the order + of the sections in the top navbar. + +.. toctree:: + :hidden: + :maxdepth: 2 + + Install + user_guide + API + auto_examples/index + Community + getting_started + whats_new + Glossary + Development <{{ development_link }}> + FAQ + support + related_projects + roadmap + Governance + about diff --git a/doc/inspection.rst b/doc/inspection.rst index 57c1cfc3275e8..95d121ec10d7d 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _inspection: Inspection @@ -21,9 +15,9 @@ predictions from a model and what affects them. This can be used to evaluate assumptions and biases of a model, design a better model, or to diagnose issues with model performance. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` .. toctree:: diff --git a/doc/install.rst b/doc/install.rst index c4a3548016021..9cb50a95a1988 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -6,21 +6,21 @@ Installing scikit-learn There are different ways to install scikit-learn: - * :ref:`Install the latest official release `. This - is the best approach for most users. It will provide a stable version - and pre-built packages are available for most platforms. +* :ref:`Install the latest official release `. This + is the best approach for most users. It will provide a stable version + and pre-built packages are available for most platforms. - * Install the version of scikit-learn provided by your - :ref:`operating system or Python distribution `. - This is a quick option for those who have operating systems or Python - distributions that distribute scikit-learn. - It might not provide the latest release version. +* Install the version of scikit-learn provided by your + :ref:`operating system or Python distribution `. + This is a quick option for those who have operating systems or Python + distributions that distribute scikit-learn. + It might not provide the latest release version. - * :ref:`Building the package from source - `. This is best for users who want the - latest-and-greatest features and aren't afraid of running - brand-new code. This is also needed for users who wish to contribute to the - project. +* :ref:`Building the package from source + `. This is best for users who want the + latest-and-greatest features and aren't afraid of running + brand-new code. This is also needed for users who wish to contribute to the + project. .. _install_official_release: @@ -28,117 +28,158 @@ There are different ways to install scikit-learn: Installing the latest release ============================= -.. This quickstart installation is a hack of the awesome - https://spacy.io/usage/#quickstart page. - See the original javascript implementation - https://github.com/ines/quickstart +.. raw:: html + -.. raw:: html +.. div:: install-instructions -
- Install the 64bit version of Python 3, for instance from https://www.python.org.Install Python 3 using homebrew (brew install python) or by manually installing the package from https://www.python.org.Install python3 and python3-pip using the package manager of the Linux Distribution.Install conda using the Anaconda or miniconda - installers or the miniforge installers - (no administrator permission required for any of those). -
+ .. tab-set:: + :class: tabs-os -Then run: + .. tab-item:: Windows + :class-label: tab-4 -.. raw:: html + .. tab-set:: + :class: tabs-package-manager -
-
pip3 install -U scikit-learn
+ .. tab-item:: pip + :class-label: tab-6 + :sync: package-manager-pip -
pip install -U scikit-learn
+ Install the 64-bit version of Python 3, for instance from the + `official website `__. -
pip install -U scikit-learn
+ Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packages. -
python3 -m venv sklearn-venv
-  source sklearn-venv/bin/activate
-  pip3 install -U scikit-learn
+ .. prompt:: powershell -
python -m venv sklearn-venv
-  sklearn-venv\Scripts\activate
-  pip install -U scikit-learn
+ python -m venv sklearn-env + sklearn-env\Scripts\activate # activate + pip install -U scikit-learn -
python -m venv sklearn-venv
-  source sklearn-venv/bin/activate
-  pip install -U scikit-learn
+ In order to check your installation, you can use: -
conda create -n sklearn-env -c conda-forge scikit-learn
-  conda activate sklearn-env
-
+ .. prompt:: powershell -In order to check your installation you can use + python -m pip show scikit-learn # show scikit-learn version and location + python -m pip freeze # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" -.. raw:: html + .. tab-item:: conda + :class-label: tab-6 + :sync: package-manager-conda + + .. include:: ./install_instructions_conda.rst + + .. tab-item:: MacOS + :class-label: tab-4 + + .. tab-set:: + :class: tabs-package-manager + + .. tab-item:: pip + :class-label: tab-6 + :sync: package-manager-pip + + Install Python 3 using `homebrew `_ (`brew install python`) + or by manually installing the package from the `official website + `__. + + Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packages. + + .. prompt:: bash + + python -m venv sklearn-env + source sklearn-env/bin/activate # activate + pip install -U scikit-learn + + In order to check your installation, you can use: + + .. prompt:: bash + + python -m pip show scikit-learn # show scikit-learn version and location + python -m pip freeze # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" + + .. tab-item:: conda + :class-label: tab-6 + :sync: package-manager-conda + + .. include:: ./install_instructions_conda.rst + + .. tab-item:: Linux + :class-label: tab-4 + + .. tab-set:: + :class: tabs-package-manager + + .. tab-item:: pip + :class-label: tab-6 + :sync: package-manager-pip + + Python 3 is usually installed by default on most Linux distributions. To + check if you have it installed, try: + + .. prompt:: bash -
-
python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed
-  python3 -m pip freeze  # to see all packages installed in the active virtualenv
-  python3 -c "import sklearn; sklearn.show_versions()"
- -
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
-  python -m pip freeze  # to see all packages installed in the active virtualenv
-  python -c "import sklearn; sklearn.show_versions()"
- -
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
-  python -m pip freeze  # to see all packages installed in the active virtualenv
-  python -c "import sklearn; sklearn.show_versions()"
- -
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
-  python -m pip freeze  # to see all packages installed in the active virtualenv
-  python -c "import sklearn; sklearn.show_versions()"
- -
conda list scikit-learn  # to see which scikit-learn version is installed
-  conda list  # to see all packages installed in the active conda environment
-  python -c "import sklearn; sklearn.show_versions()"
-
- -Note that in order to avoid potential conflicts with other packages it is -strongly recommended to use a `virtual environment (venv) -`_ or a `conda environment -`_. - -Using such an isolated environment makes it possible to install a specific -version of scikit-learn with pip or conda and its dependencies independently of -any previously installed Python packages. In particular under Linux is it -discouraged to install pip packages alongside the packages managed by the + python3 --version + pip3 --version + + If you don't have Python 3 installed, please install `python3` and + `python3-pip` from your distribution's package manager. + + Now create a `virtual environment (venv) + `_ and install scikit-learn. + Note that the virtual environment is optional but strongly recommended, in + order to avoid potential conflicts with other packages. + + .. prompt:: bash + + python3 -m venv sklearn-env + source sklearn-env/bin/activate # activate + pip3 install -U scikit-learn + + In order to check your installation, you can use: + + .. prompt:: bash + + python3 -m pip show scikit-learn # show scikit-learn version and location + python3 -m pip freeze # show all installed packages in the environment + python3 -c "import sklearn; sklearn.show_versions()" + + .. tab-item:: conda + :class-label: tab-6 + :sync: package-manager-conda + + .. include:: ./install_instructions_conda.rst + + +Using an isolated environment such as pip venv or conda makes it possible to +install a specific version of scikit-learn with pip or conda and its dependencies +independently of any previously installed Python packages. In particular under Linux +it is discouraged to install pip packages alongside the packages managed by the package manager of the distribution (apt, dnf, pacman...). Note that you should always remember to activate the environment of your choice @@ -150,11 +191,10 @@ and NumPy and SciPy are not recompiled from source, which can happen when using particular configurations of operating system and hardware (such as Linux on a Raspberry Pi). - -Scikit-learn plotting capabilities (i.e., functions start with "plot\_" -and classes end with "Display") require Matplotlib. The examples require +Scikit-learn plotting capabilities (i.e., functions starting with `plot\_` +and classes ending with `Display`) require Matplotlib. The examples require Matplotlib and some examples require scikit-image, pandas, or seaborn. The -minimum version of Scikit-learn dependencies are listed below along with its +minimum version of scikit-learn dependencies are listed below along with its purpose. .. include:: min_dependency_table.rst @@ -162,12 +202,24 @@ purpose. .. warning:: Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4. - Scikit-learn 0.21 supported Python 3.5-3.7. - Scikit-learn 0.22 supported Python 3.5-3.8. - Scikit-learn 0.23 - 0.24 require Python 3.6 or newer. - Scikit-learn 1.0 supported Python 3.7-3.10. - Scikit-learn 1.1 and later requires Python 3.8 or newer. + Scikit-learn 0.21 supported Python 3.5—3.7. + + Scikit-learn 0.22 supported Python 3.5—3.8. + + Scikit-learn 0.23 required Python 3.6—3.8. + + Scikit-learn 0.24 required Python 3.6—3.9. + + Scikit-learn 1.0 supported Python 3.7—3.10. + + Scikit-learn 1.1, 1.2 and 1.3 supported Python 3.8—3.12. + + Scikit-learn 1.4 and 1.5 supported Python 3.9—3.12. + + Scikit-learn 1.6 supported Python 3.9—3.13. + + Scikit-learn 1.7 requires Python 3.10 or newer. .. _install_by_distribution: @@ -192,7 +244,7 @@ Alpine Linux's package is provided through the `official repositories ``py3-scikit-learn`` for Python. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash sudo apk add py3-scikit-learn @@ -205,7 +257,7 @@ Arch Linux's package is provided through the `official repositories ``python-scikit-learn`` for Python. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash sudo pacman -S python-scikit-learn @@ -215,14 +267,14 @@ Debian/Ubuntu The Debian/Ubuntu package is split in three different packages called ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level -implementations and bindings), ``python3-sklearn-doc`` (documentation). +implementations and bindings), ``python-sklearn-doc`` (documentation). Note that scikit-learn requires Python 3, hence the need to use the `python3-` suffixed package names. Packages can be installed using ``apt-get``: -.. prompt:: bash $ +.. prompt:: bash - sudo apt-get install python3-sklearn python3-sklearn-lib python3-sklearn-doc + sudo apt-get install python3-sklearn python3-sklearn-lib python-sklearn-doc Fedora @@ -232,7 +284,7 @@ The Fedora package is called ``python3-scikit-learn`` for the python 3 version, the only one available in Fedora. It can be installed using ``dnf``: -.. prompt:: bash $ +.. prompt:: bash sudo dnf install python3-scikit-learn @@ -240,10 +292,8 @@ It can be installed using ``dnf``: NetBSD ------ -scikit-learn is available via `pkgsrc-wip -`_: - - https://pkgsrc.se/math/py-scikit-learn +scikit-learn is available via `pkgsrc-wip `_: +https://pkgsrc.se/math/py-scikit-learn MacPorts for Mac OSX @@ -254,9 +304,9 @@ where ``XY`` denotes the Python version. It can be installed by typing the following command: -.. prompt:: bash $ +.. prompt:: bash - sudo port install py39-scikit-learn + sudo port install py312-scikit-learn Anaconda and Enthought Deployment Manager for all supported platforms @@ -276,7 +326,7 @@ Intel Extension for Scikit-learn Intel maintains an optimized x86_64 package, available in PyPI (via `pip`), and in the `main`, `conda-forge` and `intel` conda channels: -.. prompt:: bash $ +.. prompt:: bash conda install scikit-learn-intelex @@ -302,7 +352,7 @@ with `scikit-learn-intelex`, please report the issue on their WinPython for Windows ------------------------ +--------------------- The `WinPython `_ project distributes scikit-learn as an additional plugin. @@ -311,6 +361,10 @@ scikit-learn as an additional plugin. Troubleshooting =============== +If you encounter unexpected failures when installing scikit-learn, you may submit +an issue to the `issue tracker `_. +Before that, please also make sure to check the following common issues. + .. _windows_longpath: Error caused by file path length limit on Windows @@ -340,6 +394,6 @@ using the ``regedit`` tool: #. Reinstall scikit-learn (ignoring the previous broken installation): -.. prompt:: bash $ + .. prompt:: powershell - pip install --exists-action=i scikit-learn + pip install --exists-action=i scikit-learn diff --git a/doc/install_instructions_conda.rst b/doc/install_instructions_conda.rst new file mode 100644 index 0000000000000..0b5a57b747021 --- /dev/null +++ b/doc/install_instructions_conda.rst @@ -0,0 +1,16 @@ +Install conda using the +`conda-forge installers `__ (no +administrator permission required). Then run: + +.. prompt:: bash + + conda create -n sklearn-env -c conda-forge scikit-learn + conda activate sklearn-env + +In order to check your installation, you can use: + +.. prompt:: bash + + conda list scikit-learn # show scikit-learn version and location + conda list # show all installed packages in the environment + python -c "import sklearn; sklearn.show_versions()" diff --git a/doc/js/scripts/api-search.js b/doc/js/scripts/api-search.js new file mode 100644 index 0000000000000..2148e0c429aaa --- /dev/null +++ b/doc/js/scripts/api-search.js @@ -0,0 +1,12 @@ +/** + * This script is for initializing the search table on the API index page. See + * DataTables documentation for more information: https://datatables.net/ + */ + +document.addEventListener("DOMContentLoaded", function () { + new DataTable("table.apisearch-table", { + order: [], // Keep original order + lengthMenu: [10, 25, 50, 100, { label: "All", value: -1 }], + pageLength: -1, // Show all entries by default + }); +}); diff --git a/doc/js/scripts/dropdown.js b/doc/js/scripts/dropdown.js new file mode 100644 index 0000000000000..d74d138773eed --- /dev/null +++ b/doc/js/scripts/dropdown.js @@ -0,0 +1,63 @@ +/** + * This script is used to add the functionality of collapsing/expanding all dropdowns + * on the page to the sphinx-design dropdowns. This is because some browsers cannot + * search into collapsed
(such as Firefox). + * + * The reason why the buttons are added to the page with JS (dynamic) instead of with + * sphinx (static) is that the button will not work without JS activated, so we do not + * want them to show up in that case. + */ + +document.addEventListener("DOMContentLoaded", () => { + // Get all sphinx-design dropdowns + const allDropdowns = document.querySelectorAll("details.sd-dropdown"); + + allDropdowns.forEach((dropdown) => { + // Get the summary element of the dropdown, where we will place the buttons + const summaryTitle = dropdown.querySelector("summary.sd-summary-title"); + + // The state marker with the toggle all icon inside + const newStateMarker = document.createElement("span"); + const newIcon = document.createElement("i"); + newIcon.classList.add("fa-solid", "fa-angles-right"); + newStateMarker.appendChild(newIcon); + + // Classes for styling; `sd-summary-state-marker` and `sd-summary-chevron-right` are + // implemented by sphinx-design; `sk-toggle-all` is implemented by us + newStateMarker.classList.add( + "sd-summary-state-marker", + "sd-summary-chevron-right", + "sk-toggle-all" + ); + + // Bootstrap tooltip configurations + newStateMarker.setAttribute("data-bs-toggle", "tooltip"); + newStateMarker.setAttribute("data-bs-placement", "top"); + newStateMarker.setAttribute("data-bs-offset", "0,10"); + newStateMarker.setAttribute("data-bs-title", "Toggle all dropdowns"); + // Enable the tooltip + new bootstrap.Tooltip(newStateMarker); + + // Assign the collapse/expand action to the state marker + newStateMarker.addEventListener("click", () => { + if (dropdown.open) { + console.log("[SK] Collapsing all dropdowns..."); + allDropdowns.forEach((node) => { + if (node !== dropdown) { + node.removeAttribute("open"); + } + }); + } else { + console.log("[SK] Expanding all dropdowns..."); + allDropdowns.forEach((node) => { + if (node !== dropdown) { + node.setAttribute("open", ""); + } + }); + } + }); + + // Append the state marker to the summary element + summaryTitle.insertBefore(newStateMarker, summaryTitle.lastElementChild); + }); +}); diff --git a/doc/js/scripts/sg_plotly_resize.js b/doc/js/scripts/sg_plotly_resize.js new file mode 100644 index 0000000000000..2d2611910db78 --- /dev/null +++ b/doc/js/scripts/sg_plotly_resize.js @@ -0,0 +1,10 @@ +// Related to https://github.com/scikit-learn/scikit-learn/issues/30279 +// There an interaction between plotly and bootstrap/pydata-sphinx-theme +// that causes plotly figures to not detect the right-hand sidebar width + +// Plotly figures are responsive, this triggers a resize event once the DOM has +// finished loading so that they resize themselves. + +document.addEventListener("DOMContentLoaded", () => { + window.dispatchEvent(new Event("resize")); +}); diff --git a/doc/js/scripts/vendor/svg-pan-zoom.min.js b/doc/js/scripts/vendor/svg-pan-zoom.min.js new file mode 100644 index 0000000000000..bde44a689bfe1 --- /dev/null +++ b/doc/js/scripts/vendor/svg-pan-zoom.min.js @@ -0,0 +1,31 @@ +/** + * svg-pan-zoom v3.6.2 + * + * https://github.com/bumbu/svg-pan-zoom + * + * Copyright 2009-2010 Andrea Leofreddi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +!function s(r,a,l){function u(e,t){if(!a[e]){if(!r[e]){var o="function"==typeof require&&require;if(!t&&o)return o(e,!0);if(h)return h(e,!0);var n=new Error("Cannot find module '"+e+"'");throw n.code="MODULE_NOT_FOUND",n}var i=a[e]={exports:{}};r[e][0].call(i.exports,function(t){return u(r[e][1][t]||t)},i,i.exports,s,r,a,l)}return a[e].exports}for(var h="function"==typeof require&&require,t=0;tthis.options.maxZoom*n.zoom&&(t=this.options.maxZoom*n.zoom/this.getZoom());var i=this.viewport.getCTM(),s=e.matrixTransform(i.inverse()),r=this.svg.createSVGMatrix().translate(s.x,s.y).scale(t).translate(-s.x,-s.y),a=i.multiply(r);a.a!==i.a&&this.viewport.setCTM(a)},i.prototype.zoom=function(t,e){this.zoomAtPoint(t,a.getSvgCenterPoint(this.svg,this.width,this.height),e)},i.prototype.publicZoom=function(t,e){e&&(t=this.computeFromRelativeZoom(t)),this.zoom(t,e)},i.prototype.publicZoomAtPoint=function(t,e,o){if(o&&(t=this.computeFromRelativeZoom(t)),"SVGPoint"!==r.getType(e)){if(!("x"in e&&"y"in e))throw new Error("Given point is invalid");e=a.createSVGPoint(this.svg,e.x,e.y)}this.zoomAtPoint(t,e,o)},i.prototype.getZoom=function(){return this.viewport.getZoom()},i.prototype.getRelativeZoom=function(){return this.viewport.getRelativeZoom()},i.prototype.computeFromRelativeZoom=function(t){return t*this.viewport.getOriginalState().zoom},i.prototype.resetZoom=function(){var t=this.viewport.getOriginalState();this.zoom(t.zoom,!0)},i.prototype.resetPan=function(){this.pan(this.viewport.getOriginalState())},i.prototype.reset=function(){this.resetZoom(),this.resetPan()},i.prototype.handleDblClick=function(t){var e;if((this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),this.options.controlIconsEnabled)&&-1<(t.target.getAttribute("class")||"").indexOf("svg-pan-zoom-control"))return!1;e=t.shiftKey?1/(2*(1+this.options.zoomScaleSensitivity)):2*(1+this.options.zoomScaleSensitivity);var o=a.getEventPoint(t,this.svg).matrixTransform(this.svg.getScreenCTM().inverse());this.zoomAtPoint(e,o)},i.prototype.handleMouseDown=function(t,e){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),r.mouseAndTouchNormalize(t,this.svg),this.options.dblClickZoomEnabled&&r.isDblClick(t,e)?this.handleDblClick(t):(this.state="pan",this.firstEventCTM=this.viewport.getCTM(),this.stateOrigin=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()))},i.prototype.handleMouseMove=function(t){if(this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&this.options.panEnabled){var e=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()),o=this.firstEventCTM.translate(e.x-this.stateOrigin.x,e.y-this.stateOrigin.y);this.viewport.setCTM(o)}},i.prototype.handleMouseUp=function(t){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&(this.state="none")},i.prototype.fit=function(){var t=this.viewport.getViewBox(),e=Math.min(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.contain=function(){var t=this.viewport.getViewBox(),e=Math.max(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.center=function(){var t=this.viewport.getViewBox(),e=.5*(this.width-(t.width+2*t.x)*this.getZoom()),o=.5*(this.height-(t.height+2*t.y)*this.getZoom());this.getPublicInstance().pan({x:e,y:o})},i.prototype.updateBBox=function(){this.viewport.simpleViewBoxCache()},i.prototype.pan=function(t){var e=this.viewport.getCTM();e.e=t.x,e.f=t.y,this.viewport.setCTM(e)},i.prototype.panBy=function(t){var e=this.viewport.getCTM();e.e+=t.x,e.f+=t.y,this.viewport.setCTM(e)},i.prototype.getPan=function(){var t=this.viewport.getState();return{x:t.x,y:t.y}},i.prototype.resize=function(){var t=a.getBoundingClientRectNormalized(this.svg);this.width=t.width,this.height=t.height;var e=this.viewport;e.options.width=this.width,e.options.height=this.height,e.processCTM(),this.options.controlIconsEnabled&&(this.getPublicInstance().disableControlIcons(),this.getPublicInstance().enableControlIcons())},i.prototype.destroy=function(){var e=this;for(var t in this.beforeZoom=null,this.onZoom=null,this.beforePan=null,this.onPan=null,(this.onUpdatedCTM=null)!=this.options.customEventsHandler&&this.options.customEventsHandler.destroy({svgElement:this.svg,eventsListenerElement:this.options.eventsListenerElement,instance:this.getPublicInstance()}),this.eventListeners)(this.options.eventsListenerElement||this.svg).removeEventListener(t,this.eventListeners[t],!this.options.preventMouseEventsDefault&&h);this.disableMouseWheelZoom(),this.getPublicInstance().disableControlIcons(),this.reset(),c=c.filter(function(t){return t.svg!==e.svg}),delete this.options,delete this.viewport,delete this.publicInstance,delete this.pi,this.getPublicInstance=function(){return null}},i.prototype.getPublicInstance=function(){var o=this;return this.publicInstance||(this.publicInstance=this.pi={enablePan:function(){return o.options.panEnabled=!0,o.pi},disablePan:function(){return o.options.panEnabled=!1,o.pi},isPanEnabled:function(){return!!o.options.panEnabled},pan:function(t){return o.pan(t),o.pi},panBy:function(t){return o.panBy(t),o.pi},getPan:function(){return o.getPan()},setBeforePan:function(t){return o.options.beforePan=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnPan:function(t){return o.options.onPan=null===t?null:r.proxy(t,o.publicInstance),o.pi},enableZoom:function(){return o.options.zoomEnabled=!0,o.pi},disableZoom:function(){return o.options.zoomEnabled=!1,o.pi},isZoomEnabled:function(){return!!o.options.zoomEnabled},enableControlIcons:function(){return o.options.controlIconsEnabled||(o.options.controlIconsEnabled=!0,s.enable(o)),o.pi},disableControlIcons:function(){return o.options.controlIconsEnabled&&(o.options.controlIconsEnabled=!1,s.disable(o)),o.pi},isControlIconsEnabled:function(){return!!o.options.controlIconsEnabled},enableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!0,o.pi},disableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!1,o.pi},isDblClickZoomEnabled:function(){return!!o.options.dblClickZoomEnabled},enableMouseWheelZoom:function(){return o.enableMouseWheelZoom(),o.pi},disableMouseWheelZoom:function(){return o.disableMouseWheelZoom(),o.pi},isMouseWheelZoomEnabled:function(){return!!o.options.mouseWheelZoomEnabled},setZoomScaleSensitivity:function(t){return o.options.zoomScaleSensitivity=t,o.pi},setMinZoom:function(t){return o.options.minZoom=t,o.pi},setMaxZoom:function(t){return o.options.maxZoom=t,o.pi},setBeforeZoom:function(t){return o.options.beforeZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnZoom:function(t){return o.options.onZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},zoom:function(t){return o.publicZoom(t,!0),o.pi},zoomBy:function(t){return o.publicZoom(t,!1),o.pi},zoomAtPoint:function(t,e){return o.publicZoomAtPoint(t,e,!0),o.pi},zoomAtPointBy:function(t,e){return o.publicZoomAtPoint(t,e,!1),o.pi},zoomIn:function(){return this.zoomBy(1+o.options.zoomScaleSensitivity),o.pi},zoomOut:function(){return this.zoomBy(1/(1+o.options.zoomScaleSensitivity)),o.pi},getZoom:function(){return o.getRelativeZoom()},setOnUpdatedCTM:function(t){return o.options.onUpdatedCTM=null===t?null:r.proxy(t,o.publicInstance),o.pi},resetZoom:function(){return o.resetZoom(),o.pi},resetPan:function(){return o.resetPan(),o.pi},reset:function(){return o.reset(),o.pi},fit:function(){return o.fit(),o.pi},contain:function(){return o.contain(),o.pi},center:function(){return o.center(),o.pi},updateBBox:function(){return o.updateBBox(),o.pi},resize:function(){return o.resize(),o.pi},getSizes:function(){return{width:o.width,height:o.height,realZoom:o.getZoom(),viewBox:o.viewport.getViewBox()}},destroy:function(){return o.destroy(),o.pi}}),this.publicInstance};var c=[];e.exports=function(t,e){var o=r.getSvg(t);if(null===o)return null;for(var n=c.length-1;0<=n;n--)if(c[n].svg===o)return c[n].instance.getPublicInstance();return c.push({svg:o,instance:new i(o,e)}),c[c.length-1].instance.getPublicInstance()}},{"./control-icons":1,"./shadow-viewport":2,"./svg-utilities":5,"./uniwheel":6,"./utilities":7}],5:[function(t,e,o){var l=t("./utilities"),s="unknown";document.documentMode&&(s="ie"),e.exports={svgNS:"http://www.w3.org/2000/svg",xmlNS:"http://www.w3.org/XML/1998/namespace",xmlnsNS:"http://www.w3.org/2000/xmlns/",xlinkNS:"http://www.w3.org/1999/xlink",evNS:"http://www.w3.org/2001/xml-events",getBoundingClientRectNormalized:function(t){if(t.clientWidth&&t.clientHeight)return{width:t.clientWidth,height:t.clientHeight};if(t.getBoundingClientRect())return t.getBoundingClientRect();throw new Error("Cannot get BoundingClientRect for SVG.")},getOrCreateViewport:function(t,e){var o=null;if(!(o=l.isElement(e)?e:t.querySelector(e))){var n=Array.prototype.slice.call(t.childNodes||t.children).filter(function(t){return"defs"!==t.nodeName&&"#text"!==t.nodeName});1===n.length&&"g"===n[0].nodeName&&null===n[0].getAttribute("transform")&&(o=n[0])}if(!o){var i="viewport-"+(new Date).toISOString().replace(/\D/g,"");(o=document.createElementNS(this.svgNS,"g")).setAttribute("id",i);var s=t.childNodes||t.children;if(s&&0 | __Logo 1__
File type: PNG
File size: 49 KB (1280 x 689 px)
File name: [1280px-scikit-learn-logo.png](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/1280px-scikit-learn-logo.png) | +| | __Logo 1__
File type: PNG
File size: 49 KB (1280 x 689 px)
File name: [1280px-scikit-learn-logo.png](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/1280px-scikit-learn-logo.png) | | | __Logo 2__
File type: ICO
File size: 2 KB (32 x 32 px)
File name: [favicon.ico](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/favicon.ico) | -| | __Logo 3__
File type: SVG
File size: 5 KB
File name: [scikit-learn-logo-without-subtitle.svg](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/scikit-learn-logo-without-subtitle.svg) | -| | __Logo 4__
File type: SVG
File size: 4.59 KB
File name: [scikit-learn-logo.svg](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/scikit-learn-logo.svg) | +| | __Logo 3__
File type: SVG
File size: 5 KB
File name: [scikit-learn-logo-without-subtitle.svg](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/scikit-learn-logo-without-subtitle.svg) | +| | __Logo 4__
File type: SVG
File size: 4.59 KB
File name: [scikit-learn-logo.svg](https://github.com/scikit-learn/scikit-learn/blob/main/doc/logos/scikit-learn-logo.svg) |
@@ -51,8 +51,8 @@ You may highlight or reference your work with scikit-learn by using one of the l - __Clear Space:__ To ensure the logo is clearly visible in all uses, surround it with a sufficient amount of clear space that is free of type, graphics, and other elements that might cause visual clutter. Do not overlap or obscure the logo with text, images, or other elements. The image below demonstrates the suggested amount of clear space margins to use around the logo.
-- __Colors:__ Only use logos in the approved color palette defined above. Do not recolor the logo. -- __Typeface:__ Do not change the typeface used in the logo. +- __Colors:__ Only use logos in the approved color palette defined above. Do not recolor the logo. +- __Typeface:__ Do not change the typeface used in the logo. - __No Modification:__ Do not attempt recreate or otherwise modify the scikit-learn logo. diff --git a/doc/machine_learning_map.rst b/doc/machine_learning_map.rst new file mode 100644 index 0000000000000..e63ab1b1ddce6 --- /dev/null +++ b/doc/machine_learning_map.rst @@ -0,0 +1,76 @@ +:html_theme.sidebar_secondary.remove: + +.. _ml_map: + +Choosing the right estimator +============================ + +Often the hardest part of solving a machine learning problem can be finding the right +estimator for the job. Different estimators are better suited for different types of +data and different problems. + +The flowchart below is designed to give users a bit of a rough guide on how to approach +problems with regard to which estimators to try on your data. Click on any estimator in +the chart below to see its documentation. The **Try next** orange arrows are to be read as +"if this estimator does not achieve the desired outcome, then follow the arrow and try +the next one". Use scroll wheel to zoom in and out, and click and drag to pan around. +You can also download the chart: :download:`ml_map.svg `. + +.. raw:: html + + + + + + +
+ +.. raw:: html + :file: images/ml_map.svg + +.. raw:: html + +
diff --git a/doc/maintainers.rst b/doc/maintainers.rst index 0ba69d8afa60d..6b4f3a25c0ddc 100644 --- a/doc/maintainers.rst +++ b/doc/maintainers.rst @@ -10,10 +10,6 @@

Jérémie du Boisberranger

-
-

Joris Van den Bossche

-
-

Loïc Estève

@@ -30,10 +26,6 @@

Olivier Grisel

-
-

Yaroslav Halchenko

-
-

Tim Head

@@ -54,58 +46,38 @@

Guillaume Lemaitre

-
-

Christian Lorentzen

+
+

Adam Li

-
-

Jan Hendrik Metzen

+
+

Lucy Liu

-
-

Andreas Mueller

+
+

Christian Lorentzen

-
-

Vlad Niculae

+
+

Andreas Mueller


Joel Nothman

-
-

Hanmin Qin

-
-

Omar Salman

-
-

Bertrand Thirion

-
-
-
-

Tom Dupré la Tour

-
-

Gael Varoquaux

-
-

Nelle Varoquaux

-
-

Yao Xiao

-
-

Roman Yurchak

-
-

Meekail Zain

diff --git a/doc/maintainers_emeritus.rst b/doc/maintainers_emeritus.rst index b979b77bba974..f5640ab2caf31 100644 --- a/doc/maintainers_emeritus.rst +++ b/doc/maintainers_emeritus.rst @@ -1,4 +1,5 @@ - Mathieu Blondel +- Joris Van den Bossche - Matthieu Brucher - Lars Buitinck - David Cournapeau @@ -11,6 +12,7 @@ - Angel Soler Gollonet - Chris Gorgolewski - Jaques Grobler +- Yaroslav Halchenko - Brian Holt - Arnaud Joly - Thouis (Ray) Jones @@ -20,14 +22,21 @@ - Wei Li - Paolo Losi - Gilles Louppe +- Jan Hendrik Metzen - Vincent Michel - Jarrod Millman +- Vlad Niculae - Alexandre Passos - Fabian Pedregosa - Peter Prettenhofer +- Hanmin Qin - (Venkat) Raghav, Rajagopalan - Jacob Schreiber - 杜世橋 Du Shiqiao +- Bertrand Thirion +- Tom Dupré la Tour - Jake Vanderplas +- Nelle Varoquaux - David Warde-Farley - Ron Weiss +- Roman Yurchak diff --git a/doc/make.bat b/doc/make.bat index b7e269a6a7836..2a32bcb678f62 100644 --- a/doc/make.bat +++ b/doc/make.bat @@ -29,8 +29,30 @@ if "%1" == "help" ( ) if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* + if exist %BUILDDIR%\ ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s "%%i" + del /q /s %BUILDDIR%\* + echo. Removed %BUILDDIR%\* + ) + if exist auto_examples\ ( + rmdir /q /s auto_examples + echo. Removed auto_examples\ + ) + if exist generated\ ( + for /d %%i in (generated\*) do rmdir /q /s "%%i" + del /q /s generated\* + echo. Removed generated\* + ) + if exist modules\generated\ ( + rmdir /q /s modules\generated + echo. Removed modules\generated\ + ) + if exist css\styles\ ( + rmdir /q /s css\styles + echo. Removed css\styles\ + ) + for %%i in (api\*.rst) do del /q "%%i" + echo. Removed api\*.rst goto end ) @@ -46,6 +68,7 @@ if "%1" == "html-noplot" ( %SPHINXBUILD% -D plot_gallery=0 -b html %ALLSPHINXOPTS% %BUILDDIR%/html echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html + goto end ) if "%1" == "dirhtml" ( diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index d319b311dddd7..d302b84c5de68 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -1,7 +1,5 @@ .. currentmodule:: sklearn -.. TODO: update doc/conftest.py once document is updated and examples run. - .. _metadata_routing: Metadata Routing @@ -84,8 +82,8 @@ Weighted scoring and fitting The splitter used internally in :class:`~linear_model.LogisticRegressionCV`, :class:`~model_selection.GroupKFold`, requests ``groups`` by default. However, we need to explicitly request `sample_weight` for it and for our custom scorer by specifying -`sample_weight=True` in :class:`~linear_model.LogisticRegressionCV`s `set_fit_request()` -method and in :func:`~metrics.make_scorer`s `set_score_request()` method. Both +`sample_weight=True` in :class:`~linear_model.LogisticRegressionCV`'s `set_fit_request()` +method and in :func:`~metrics.make_scorer`'s `set_score_request()` method. Both :term:`consumers ` know how to use ``sample_weight`` in their `fit()` or `score()` methods. We can then pass the metadata in :func:`~model_selection.cross_validate` which will route it to any active consumers:: @@ -248,7 +246,8 @@ should be passed to the estimator's scorer or not:: [sample_weight] are passed but are not explicitly set as requested or not requested for LogisticRegression.score, which is used within GridSearchCV.fit. Call `LogisticRegression.set_score_request({metadata}=True/False)` for each metadata - you want to request/ignore. + you want to request/ignore. See the Metadata Routing User guide + for more information. The issue can be fixed by explicitly setting the request value:: @@ -276,12 +275,18 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.calibration.CalibratedClassifierCV` - :class:`sklearn.compose.ColumnTransformer` +- :class:`sklearn.compose.TransformedTargetRegressor` - :class:`sklearn.covariance.GraphicalLassoCV` +- :class:`sklearn.ensemble.StackingClassifier` +- :class:`sklearn.ensemble.StackingRegressor` - :class:`sklearn.ensemble.VotingClassifier` - :class:`sklearn.ensemble.VotingRegressor` - :class:`sklearn.ensemble.BaggingClassifier` - :class:`sklearn.ensemble.BaggingRegressor` +- :class:`sklearn.feature_selection.RFE` +- :class:`sklearn.feature_selection.RFECV` - :class:`sklearn.feature_selection.SelectFromModel` +- :class:`sklearn.feature_selection.SequentialFeatureSelector` - :class:`sklearn.impute.IterativeImputer` - :class:`sklearn.linear_model.ElasticNetCV` - :class:`sklearn.linear_model.LarsCV` @@ -290,6 +295,7 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.linear_model.LogisticRegressionCV` - :class:`sklearn.linear_model.MultiTaskElasticNetCV` - :class:`sklearn.linear_model.MultiTaskLassoCV` +- :class:`sklearn.linear_model.OrthogonalMatchingPursuitCV` - :class:`sklearn.linear_model.RANSACRegressor` - :class:`sklearn.linear_model.RidgeClassifierCV` - :class:`sklearn.linear_model.RidgeCV` @@ -297,33 +303,24 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.model_selection.HalvingGridSearchCV` - :class:`sklearn.model_selection.HalvingRandomSearchCV` - :class:`sklearn.model_selection.RandomizedSearchCV` +- :class:`sklearn.model_selection.permutation_test_score` - :func:`sklearn.model_selection.cross_validate` - :func:`sklearn.model_selection.cross_val_score` - :func:`sklearn.model_selection.cross_val_predict` +- :class:`sklearn.model_selection.learning_curve` +- :class:`sklearn.model_selection.validation_curve` - :class:`sklearn.multiclass.OneVsOneClassifier` - :class:`sklearn.multiclass.OneVsRestClassifier` - :class:`sklearn.multiclass.OutputCodeClassifier` - :class:`sklearn.multioutput.ClassifierChain` - :class:`sklearn.multioutput.MultiOutputClassifier` - :class:`sklearn.multioutput.MultiOutputRegressor` -- :class:`sklearn.linear_model.OrthogonalMatchingPursuitCV` - :class:`sklearn.multioutput.RegressorChain` - :class:`sklearn.pipeline.FeatureUnion` - :class:`sklearn.pipeline.Pipeline` +- :class:`sklearn.semi_supervised.SelfTrainingClassifier` Meta-estimators and tools not supporting metadata routing yet: -- :class:`sklearn.compose.TransformedTargetRegressor` - :class:`sklearn.ensemble.AdaBoostClassifier` - :class:`sklearn.ensemble.AdaBoostRegressor` -- :class:`sklearn.ensemble.StackingClassifier` -- :class:`sklearn.ensemble.StackingRegressor` -- :class:`sklearn.feature_selection.RFE` -- :class:`sklearn.feature_selection.RFECV` -- :class:`sklearn.feature_selection.SequentialFeatureSelector` -- :class:`sklearn.impute.IterativeImputer` -- :class:`sklearn.linear_model.RANSACRegressor` -- :class:`sklearn.model_selection.learning_curve` -- :class:`sklearn.model_selection.permutation_test_score` -- :class:`sklearn.model_selection.validation_curve` -- :class:`sklearn.semi_supervised.SelfTrainingClassifier` diff --git a/doc/min_dependency_substitutions.rst.template b/doc/min_dependency_substitutions.rst.template new file mode 100644 index 0000000000000..946de84902b3b --- /dev/null +++ b/doc/min_dependency_substitutions.rst.template @@ -0,0 +1,3 @@ +{% for package, (version, _) in dependent_packages.items() -%} +.. |{{ package|capitalize }}MinVersion| replace:: {{ version }} +{% endfor %} diff --git a/doc/min_dependency_table.rst.template b/doc/min_dependency_table.rst.template new file mode 100644 index 0000000000000..fbe58633e913a --- /dev/null +++ b/doc/min_dependency_table.rst.template @@ -0,0 +1,13 @@ +.. list-table:: + :header-rows: 1 + + * - Dependency + - Minimum Version + - Purpose + + {% for package, (version, tags) in dependent_packages.items() -%} + * - {{ package }} + - {{ version }} + - {{ tags }} + + {% endfor %} diff --git a/doc/model_persistence.rst b/doc/model_persistence.rst index afd492d805e58..21d6934a48730 100644 --- a/doc/model_persistence.rst +++ b/doc/model_persistence.rst @@ -1,294 +1,394 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. _model_persistence: - -================= -Model persistence -================= - -After training a scikit-learn model, it is desirable to have a way to persist -the model for future use without having to retrain. This can be accomplished -using `pickle `_, `joblib -`_, `skops -`_, `ONNX `_, -or `PMML `_. In most cases -`pickle` can be used to persist a trained scikit-learn model. Once all -transitive scikit-learn dependencies have been pinned, the trained model can -then be loaded and executed under conditions similar to those in which it was -originally pinned. The following sections will give you some hints on how to -persist a scikit-learn model and will provide details on what each alternative -can offer. - -Workflow Overview ------------------ - -In this section we present a general workflow on how to persist a -scikit-learn model. We will demonstrate this with a simple example using -Python's built-in persistence module, namely `pickle -`_. - -Storing the model in an artifact -................................ - -Once the model training process in completed, the trained model can be stored -as an artifact with the help of `pickle`. The model can be saved using the -process of serialization, where the Python object hierarchy is converted into -a byte stream. We can persist a trained model in the following manner:: - - >>> from sklearn import svm - >>> from sklearn import datasets - >>> import pickle - >>> clf = svm.SVC() - >>> X, y = datasets.load_iris(return_X_y=True) - >>> clf.fit(X, y) - SVC() - >>> s = pickle.dumps(clf) - -Replicating the training environment in production -.................................................. - -The versions of the dependencies used may differ from training to production. -This may result in unexpected behaviour and errors while using the trained -model. To prevent such situations it is recommended to use the same -dependencies and versions in both the training and production environment. -These transitive dependencies can be pinned with the help of `pip`, `conda`, -`poetry`, `conda-lock`, `pixi`, etc. - -.. note:: - - To execute a pickled scikit-learn model in a reproducible environment it is - advisable to pin all transitive scikit-learn dependencies. This prevents - any incompatibility issues that may arise while trying to load the pickled - model. You can read more about persisting models with `pickle` over - :ref:`here `. - -Loading the model artifact -.......................... - -The saved scikit-learn model can be loaded using `pickle` for future use -without having to re-train the entire model from scratch. The saved model -artifact can be unpickled by converting the byte stream into an object -hierarchy. This can be done with the help of `pickle` as follows:: - - >>> clf2 = pickle.loads(s) # doctest:+SKIP - >>> clf2.predict(X[0:1]) # doctest:+SKIP - array([0]) - >>> y[0] # doctest:+SKIP - 0 - -Serving the model artifact -.......................... - -The last step after training a scikit-learn model is serving the model. -Once the trained model is successfully loaded it can be served to manage -different prediction requests. This can involve deploying the model as a -web service using containerization, or other model deployment strategies, -according to the specifications. In the next sections, we will explore -different approaches to persist a trained scikit-learn model. - -.. _persisting_models_with_pickle: - -Persisting models with pickle ------------------------------ - -As demonstrated in the previous section, `pickle` uses serialization and -deserialization to persist scikit-learn models. Instead of using `dumps` and -`loads`, `dump` and `load` can also be used in the following way:: - - >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn import datasets - >>> clf = DecisionTreeClassifier() - >>> X, y = datasets.load_iris(return_X_y=True) - >>> clf.fit(X, y) - DecisionTreeClassifier() - >>> from pickle import dump, load - >>> with open('filename.pkl', 'wb') as f: dump(clf, f) # doctest:+SKIP - >>> with open('filename.pkl', 'rb') as f: clf2 = load(f) # doctest:+SKIP - >>> clf2.predict(X[0:1]) # doctest:+SKIP - array([0]) - >>> y[0] - 0 - -For applications that involve writing and loading the serialized object to or -from a file, `dump` and `load` can be used instead of `dumps` and `loads`. When -file operations are not required the pickled representation of the object can -be returned as a bytes object with the help of the `dumps` function. The -reconstituted object hierarchy of the pickled data can then be returned using -the `loads` function. - -Persisting models with joblib ------------------------------ - -In the specific case of scikit-learn, it may be better to use joblib's -replacement of pickle (``dump`` & ``load``), which is more efficient on -objects that carry large numpy arrays internally as is often the case for -fitted scikit-learn estimators, but can only pickle to the disk and not to a -string:: - - >>> from joblib import dump, load - >>> dump(clf, 'filename.joblib') # doctest:+SKIP - -Later you can load back the pickled model (possibly in another Python process) -with:: - - >>> clf = load('filename.joblib') # doctest:+SKIP - -.. note:: - - ``dump`` and ``load`` functions also accept file-like object - instead of filenames. More information on data persistence with Joblib is - available `here - `_. - -|details-start| -**InconsistentVersionWarning** -|details-split| - -When an estimator is unpickled with a scikit-learn version that is inconsistent -with the version the estimator was pickled with, a -:class:`~sklearn.exceptions.InconsistentVersionWarning` is raised. This warning -can be caught to obtain the original version the estimator was pickled with:: - - from sklearn.exceptions import InconsistentVersionWarning - warnings.simplefilter("error", InconsistentVersionWarning) - - try: - est = pickle.loads("model_from_prevision_version.pickle") - except InconsistentVersionWarning as w: - print(w.original_sklearn_version) - -|details-end| - -.. _persistence_limitations: - -Security & maintainability limitations for pickle and joblib ------------------------------------------------------------- - -pickle (and joblib by extension), has some issues regarding maintainability -and security. Because of this, - -* Never unpickle untrusted data as it could lead to malicious code being - executed upon loading. -* While models saved using one version of scikit-learn might load in - other versions, this is entirely unsupported and inadvisable. It should - also be kept in mind that operations performed on such data could give - different and unexpected results. - -In order to rebuild a similar model with future versions of scikit-learn, -additional metadata should be saved along the pickled model: - -* The training data, e.g. a reference to an immutable snapshot -* The python source code used to generate the model -* The versions of scikit-learn and its dependencies -* The cross validation score obtained on the training data - -This should make it possible to check that the cross-validation score is in the -same range as before. - -Aside for a few exceptions, pickled models should be portable across -architectures assuming the same versions of dependencies and Python are used. -If you encounter an estimator that is not portable please open an issue on -GitHub. Pickled models are often deployed in production using containers, like -Docker, in order to freeze the environment and dependencies. - -If you want to know more about these issues and explore other possible -serialization methods, please refer to this -`talk by Alex Gaynor -`_. - -Persisting models with a more secure format using skops -------------------------------------------------------- - -`skops `__ provides a more secure -format via the :mod:`skops.io` module. It avoids using :mod:`pickle` and only -loads files which have types and references to functions which are trusted -either by default or by the user. - -|details-start| -**Using skops** -|details-split| - -The API is very similar to ``pickle``, and -you can persist your models as explain in the `docs -`__ using -:func:`skops.io.dump` and :func:`skops.io.dumps`:: - - import skops.io as sio - obj = sio.dumps(clf) - -And you can load them back using :func:`skops.io.load` and -:func:`skops.io.loads`. However, you need to specify the types which are -trusted by you. You can get existing unknown types in a dumped object / file -using :func:`skops.io.get_untrusted_types`, and after checking its contents, -pass it to the load function:: - - unknown_types = sio.get_untrusted_types(data=obj) - clf = sio.loads(obj, trusted=unknown_types) - -If you trust the source of the file / object, you can pass ``trusted=True``:: - - clf = sio.loads(obj, trusted=True) - -Please report issues and feature requests related to this format on the `skops -issue tracker `__. - -|details-end| - -Persisting models with interoperable formats --------------------------------------------- - -For reproducibility and quality control needs, when different architectures -and environments should be taken into account, exporting the model in -`Open Neural Network -Exchange `_ format or `Predictive Model Markup Language -(PMML) `_ format -might be a better approach than using `pickle` alone. -These are helpful where you may want to use your model for prediction in a -different environment from where the model was trained. - -ONNX is a binary serialization of the model. It has been developed to improve -the usability of the interoperable representation of data models. -It aims to facilitate the conversion of the data -models between different machine learning frameworks, and to improve their -portability on different computing architectures. More details are available -from the `ONNX tutorial `_. -To convert scikit-learn model to ONNX a specific tool `sklearn-onnx -`_ has been developed. - -PMML is an implementation of the `XML -`_ document standard -defined to represent data models together with the data used to generate them. -Being human and machine readable, -PMML is a good option for model validation on different platforms and -long term archiving. On the other hand, as XML in general, its verbosity does -not help in production when performance is critical. -To convert scikit-learn model to PMML you can use for example `sklearn2pmml -`_ distributed under the Affero GPLv3 -license. - -Summarizing the keypoints -------------------------- - -Based on the different approaches for model persistence, the keypoints for each -approach can be summarized as follows: - -* `pickle`: It is native to Python and any Python object can be serialized and - deserialized using `pickle`, including custom Python classes and objects. - While `pickle` can be used to easily save and load scikit-learn models, - unpickling of untrusted data might lead to security issues. -* `joblib`: Efficient storage and memory mapping techniques make it faster - when working with large machine learning models or large numpy arrays. However, - it may trigger the execution of malicious code while loading untrusted data. -* `skops`: Trained scikit-learn models can be easily shared and put into - production using `skops`. It is more secure compared to alternate approaches - as it allows users to load data from trusted sources. It however, does not - allow for persistence of arbitrary Python code. -* `ONNX`: It provides a uniform format for persisting any machine learning - or deep learning model (other than scikit-learn) and is useful - for model inference. It can however, result in compatibility issues with - different frameworks. -* `PMML`: Platform independent format that can be used to persist models - and reduce the risk of vendor lock-ins. The complexity and verbosity of - this format might make it harder to use for larger models. \ No newline at end of file +.. _model_persistence: + +================= +Model persistence +================= + +.. list-table:: Summary of model persistence methods + :widths: 25 50 50 + :header-rows: 1 + + * - Persistence method + - Pros + - Risks / Cons + * - :ref:`ONNX ` + - * Serve models without a Python environment + * Serving and training environments independent of one another + * Most secure option + - * Not all scikit-learn models are supported + * Custom estimators require more work to support + * Original Python object is lost and cannot be reconstructed + * - :ref:`skops_persistence` + - * More secure than `pickle` based formats + * Contents can be partly validated without loading + - * Not as fast as `pickle` based formats + * Supports less types than `pickle` based formats + * Requires the same environment as the training environment + * - :mod:`pickle` + - * Native to Python + * Can serialize most Python objects + * Efficient memory usage with `protocol=5` + - * Loading can execute arbitrary code + * Requires the same environment as the training environment + * - :mod:`joblib` + - * Efficient memory usage + * Supports memory mapping + * Easy shortcuts for compression and decompression + - * Pickle based format + * Loading can execute arbitrary code + * Requires the same environment as the training environment + * - `cloudpickle`_ + - * Can serialize non-packaged, custom Python code + * Comparable loading efficiency as :mod:`pickle` with `protocol=5` + - * Pickle based format + * Loading can execute arbitrary code + * No forward compatibility guarantees + * Requires the same environment as the training environment + +After training a scikit-learn model, it is desirable to have a way to persist +the model for future use without having to retrain. Based on your use-case, +there are a few different ways to persist a scikit-learn model, and here we +help you decide which one suits you best. In order to make a decision, you need +to answer the following questions: + +1. Do you need the Python object after persistence, or do you only need to + persist in order to serve the model and get predictions out of it? + +If you only need to serve the model and no further investigation on the Python +object itself is required, then :ref:`ONNX ` might be the +best fit for you. Note that not all models are supported by ONNX. + +In case ONNX is not suitable for your use-case, the next question is: + +2. Do you absolutely trust the source of the model, or are there any security + concerns regarding where the persisted model comes from? + +If you have security concerns, then you should consider using :ref:`skops.io +` which gives you back the Python object, but unlike +`pickle` based persistence solutions, loading the persisted model doesn't +automatically allow arbitrary code execution. Note that this requires manual +investigation of the persisted file, which :mod:`skops.io` allows you to do. + +The other solutions assume you absolutely trust the source of the file to be +loaded, as they are all susceptible to arbitrary code execution upon loading +the persisted file since they all use the pickle protocol under the hood. + +3. Do you care about the performance of loading the model, and sharing it + between processes where a memory mapped object on disk is beneficial? + +If yes, then you can consider using :ref:`joblib `. If this +is not a major concern for you, then you can use the built-in :mod:`pickle` +module. + +4. Did you try :mod:`pickle` or :mod:`joblib` and found that the model cannot + be persisted? It can happen for instance when you have user defined + functions in your model. + +If yes, then you can use `cloudpickle`_ which can serialize certain objects +which cannot be serialized by :mod:`pickle` or :mod:`joblib`. + + +Workflow Overview +----------------- + +In a typical workflow, the first step is to train the model using scikit-learn +and scikit-learn compatible libraries. Note that support for scikit-learn and +third party estimators varies across the different persistence methods. + +Train and Persist the Model +........................... + +Creating an appropriate model depends on your use-case. As an example, here we +train a :class:`sklearn.ensemble.HistGradientBoostingClassifier` on the iris +dataset:: + + >>> from sklearn import ensemble + >>> from sklearn import datasets + >>> clf = ensemble.HistGradientBoostingClassifier() + >>> X, y = datasets.load_iris(return_X_y=True) + >>> clf.fit(X, y) + HistGradientBoostingClassifier() + +Once the model is trained, you can persist it using your desired method, and +then you can load the model in a separate environment and get predictions from +it given input data. Here there are two major paths depending on how you +persist and plan to serve the model: + +- :ref:`ONNX `: You need an `ONNX` runtime and an environment + with appropriate dependencies installed to load the model and use the runtime + to get predictions. This environment can be minimal and does not necessarily + even require Python to be installed to load the model and compute + predictions. Also note that `onnxruntime` typically requires much less RAM + than Python to compute predictions from small models. + +- :mod:`skops.io`, :mod:`pickle`, :mod:`joblib`, `cloudpickle`_: You need a + Python environment with the appropriate dependencies installed to load the + model and get predictions from it. This environment should have the same + **packages** and the same **versions** as the environment where the model was + trained. Note that none of these methods support loading a model trained with + a different version of scikit-learn, and possibly different versions of other + dependencies such as `numpy` and `scipy`. Another concern would be running + the persisted model on a different hardware, and in most cases you should be + able to load your persisted model on a different hardware. + + +.. _onnx_persistence: + +ONNX +---- + +`ONNX`, or `Open Neural Network Exchange `__ format is best +suitable in use-cases where one needs to persist the model and then use the +persisted artifact to get predictions without the need to load the Python +object itself. It is also useful in cases where the serving environment needs +to be lean and minimal, since the `ONNX` runtime does not require `python`. + +`ONNX` is a binary serialization of the model. It has been developed to improve +the usability of the interoperable representation of data models. It aims to +facilitate the conversion of the data models between different machine learning +frameworks, and to improve their portability on different computing +architectures. More details are available from the `ONNX tutorial +`__. To convert scikit-learn model to `ONNX` +`sklearn-onnx `__ has been developed. However, +not all scikit-learn models are supported, and it is limited to the core +scikit-learn and does not support most third party estimators. One can write a +custom converter for third party or custom estimators, but the documentation to +do that is sparse and it might be challenging to do so. + +.. dropdown:: Using ONNX + + To convert the model to `ONNX` format, you need to give the converter some + information about the input as well, about which you can read more `here + `__:: + + from skl2onnx import to_onnx + onx = to_onnx(clf, X[:1].astype(numpy.float32), target_opset=12) + with open("filename.onnx", "wb") as f: + f.write(onx.SerializeToString()) + + You can load the model in Python and use the `ONNX` runtime to get + predictions:: + + from onnxruntime import InferenceSession + with open("filename.onnx", "rb") as f: + onx = f.read() + sess = InferenceSession(onx, providers=["CPUExecutionProvider"]) + pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] + +.. _skops_persistence: + +`skops.io` +---------- + +:mod:`skops.io` avoids using :mod:`pickle` and only loads files which have types +and references to functions which are trusted either by default or by the user. +Therefore it provides a more secure format than :mod:`pickle`, :mod:`joblib`, +and `cloudpickle`_. + + +.. dropdown:: Using skops + + The API is very similar to :mod:`pickle`, and you can persist your models as + explained in the `documentation + `__ using + :func:`skops.io.dump` and :func:`skops.io.dumps`:: + + import skops.io as sio + obj = sio.dump(clf, "filename.skops") + + And you can load them back using :func:`skops.io.load` and + :func:`skops.io.loads`. However, you need to specify the types which are + trusted by you. You can get existing unknown types in a dumped object / file + using :func:`skops.io.get_untrusted_types`, and after checking its contents, + pass it to the load function:: + + unknown_types = sio.get_untrusted_types(file="filename.skops") + # investigate the contents of unknown_types, and only load if you trust + # everything you see. + clf = sio.load("filename.skops", trusted=unknown_types) + + Please report issues and feature requests related to this format on the `skops + issue tracker `__. + + +.. _pickle_persistence: + +`pickle`, `joblib`, and `cloudpickle` +------------------------------------- + +These three modules / packages, use the `pickle` protocol under the hood, but +come with slight variations: + +- :mod:`pickle` is a module from the Python Standard Library. It can serialize + and deserialize any Python object, including custom Python classes and + objects. +- :mod:`joblib` is more efficient than `pickle` when working with large machine + learning models or large numpy arrays. +- `cloudpickle`_ can serialize certain objects which cannot be serialized by + :mod:`pickle` or :mod:`joblib`, such as user defined functions and lambda + functions. This can happen for instance, when using a + :class:`~sklearn.preprocessing.FunctionTransformer` and using a custom + function to transform the data. + +.. dropdown:: Using `pickle`, `joblib`, or `cloudpickle` + + Depending on your use-case, you can choose one of these three methods to + persist and load your scikit-learn model, and they all follow the same API:: + + # Here you can replace pickle with joblib or cloudpickle + from pickle import dump + with open("filename.pkl", "wb") as f: + dump(clf, f, protocol=5) + + Using `protocol=5` is recommended to reduce memory usage and make it faster to + store and load any large NumPy array stored as a fitted attribute in the model. + You can alternatively pass `protocol=pickle.HIGHEST_PROTOCOL` which is + equivalent to `protocol=5` in Python 3.8 and later (at the time of writing). + + And later when needed, you can load the same object from the persisted file:: + + # Here you can replace pickle with joblib or cloudpickle + from pickle import load + with open("filename.pkl", "rb") as f: + clf = load(f) + +.. _persistence_limitations: + +Security & Maintainability Limitations +-------------------------------------- + +:mod:`pickle` (and :mod:`joblib` and :mod:`cloudpickle` by extension), has +many documented security vulnerabilities by design and should only be used if +the artifact, i.e. the pickle-file, is coming from a trusted and verified +source. You should never load a pickle file from an untrusted source, similarly +to how you should never execute code from an untrusted source. + +Also note that arbitrary computations can be represented using the `ONNX` +format, and it is therefore recommended to serve models using `ONNX` in a +sandboxed environment to safeguard against computational and memory exploits. + +Also note that there are no supported ways to load a model trained with a +different version of scikit-learn. While using :mod:`skops.io`, :mod:`joblib`, +:mod:`pickle`, or `cloudpickle`_, models saved using one version of +scikit-learn might load in other versions, however, this is entirely +unsupported and inadvisable. It should also be kept in mind that operations +performed on such data could give different and unexpected results, or even +crash your Python process. + +In order to rebuild a similar model with future versions of scikit-learn, +additional metadata should be saved along the pickled model: + +* The training data, e.g. a reference to an immutable snapshot +* The Python source code used to generate the model +* The versions of scikit-learn and its dependencies +* The cross validation score obtained on the training data + +This should make it possible to check that the cross-validation score is in the +same range as before. + +Aside for a few exceptions, persisted models should be portable across +operating systems and hardware architectures assuming the same versions of +dependencies and Python are used. If you encounter an estimator that is not +portable, please open an issue on GitHub. Persisted models are often deployed +in production using containers like Docker, in order to freeze the environment +and dependencies. + +If you want to know more about these issues, please refer to these talks: + +- `Adrin Jalali: Let's exploit pickle, and skops to the rescue! | PyData + Amsterdam 2023 `__. +- `Alex Gaynor: Pickles are for Delis, not Software - PyCon 2014 + `__. + + +.. _serving_environment: + +Replicating the training environment in production +.................................................. + +If the versions of the dependencies used may differ from training to +production, it may result in unexpected behaviour and errors while using the +trained model. To prevent such situations it is recommended to use the same +dependencies and versions in both the training and production environment. +These transitive dependencies can be pinned with the help of package management +tools like `pip`, `mamba`, `conda`, `poetry`, `conda-lock`, `pixi`, etc. + +It is not always possible to load a model trained with older versions of the +scikit-learn library and its dependencies in an updated software environment. +Instead, you might need to retrain the model with the new versions of all +the libraries. So when training a model, it is important to record the training +recipe (e.g. a Python script) and training set information, and metadata about +all the dependencies to be able to automatically reconstruct the same training +environment for the updated software. + +.. dropdown:: InconsistentVersionWarning + + When an estimator is loaded with a scikit-learn version that is inconsistent + with the version the estimator was pickled with, an + :class:`~sklearn.exceptions.InconsistentVersionWarning` is raised. This warning + can be caught to obtain the original version the estimator was pickled with:: + + from sklearn.exceptions import InconsistentVersionWarning + warnings.simplefilter("error", InconsistentVersionWarning) + + try: + with open("model_from_previous_version.pickle", "rb") as f: + est = pickle.load(f) + except InconsistentVersionWarning as w: + print(w.original_sklearn_version) + + +Serving the model artifact +.......................... + +The last step after training a scikit-learn model is serving the model. +Once the trained model is successfully loaded, it can be served to manage +different prediction requests. This can involve deploying the model as a +web service using containerization, or other model deployment strategies, +according to the specifications. + + +Summarizing the key points +-------------------------- + +Based on the different approaches for model persistence, the key points for +each approach can be summarized as follows: + +* `ONNX`: It provides a uniform format for persisting any machine learning or + deep learning model (other than scikit-learn) and is useful for model + inference (predictions). It can however, result in compatibility issues with + different frameworks. +* :mod:`skops.io`: Trained scikit-learn models can be easily shared and put + into production using :mod:`skops.io`. It is more secure compared to + alternate approaches based on :mod:`pickle` because it does not load + arbitrary code unless explicitly asked for by the user. Such code needs to be + packaged and importable in the target Python environment. +* :mod:`joblib`: Efficient memory mapping techniques make it faster when using + the same persisted model in multiple Python processes when using + `mmap_mode="r"`. It also gives easy shortcuts to compress and decompress the + persisted object without the need for extra code. However, it may trigger the + execution of malicious code when loading a model from an untrusted source as + any other pickle-based persistence mechanism. +* :mod:`pickle`: It is native to Python and most Python objects can be + serialized and deserialized using :mod:`pickle`, including custom Python + classes and functions as long as they are defined in a package that can be + imported in the target environment. While :mod:`pickle` can be used to easily + save and load scikit-learn models, it may trigger the execution of malicious + code while loading a model from an untrusted source. :mod:`pickle` can also + be very efficient memorywise if the model was persisted with `protocol=5` but + it does not support memory mapping. +* `cloudpickle`_: It has comparable loading efficiency as :mod:`pickle` and + :mod:`joblib` (without memory mapping), but offers additional flexibility to + serialize custom Python code such as lambda expressions and interactively + defined functions and classes. It might be a last resort to persist pipelines + with custom Python components such as a + :class:`sklearn.preprocessing.FunctionTransformer` that wraps a function + defined in the training script itself or more generally outside of any + importable Python package. Note that `cloudpickle`_ offers no forward + compatibility guarantees and you might need the same version of + `cloudpickle`_ to load the persisted model along with the same version of all + the libraries used to define the model. As the other pickle-based persistence + mechanisms, it may trigger the execution of malicious code while loading + a model from an untrusted source. + +.. _cloudpickle: https://github.com/cloudpipe/cloudpickle diff --git a/doc/model_selection.rst b/doc/model_selection.rst index 522544aefc820..b78c9ff4c3aa8 100644 --- a/doc/model_selection.rst +++ b/doc/model_selection.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _model_selection: Model selection and evaluation diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 7a21274a7250f..d24ce3573e7b6 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -1,7 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - .. _array_api: ================================ @@ -12,21 +8,39 @@ Array API support (experimental) The `Array API `_ specification defines a standard API for all array manipulation libraries with a NumPy-like API. -Scikit-learn's Array API support requires -`array-api-compat `__ to be installed. +Scikit-learn vendors pinned copies of +`array-api-compat `__ +and `array-api-extra `__. + +Scikit-learn's support for the array API standard requires the environment variable +`SCIPY_ARRAY_API` to be set to `1` before importing `scipy` and `scikit-learn`: + +.. prompt:: bash $ + + export SCIPY_ARRAY_API=1 + +Please note that this environment variable is intended for temporary use. +For more details, refer to SciPy's `Array API documentation +`_. Some scikit-learn estimators that primarily rely on NumPy (as opposed to using Cython) to implement the algorithmic logic of their `fit`, `predict` or `transform` methods can be configured to accept any Array API compatible input -datastructures and automatically dispatch operations to the underlying namespace +data structures and automatically dispatch operations to the underlying namespace instead of relying on NumPy. At this stage, this support is **considered experimental** and must be enabled explicitly as explained in the following. .. note:: - Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch` - are known to work with scikit-learn's estimators. + Currently, only `array-api-strict`, `cupy`, and `PyTorch` are known to work + with scikit-learn's estimators. + +The following video provides an overview of the standard's design principles +and how it facilitates interoperability between array libraries: + +- `Scikit-learn on GPUs with Array API `_ + by :user:`Thomas Fan ` at PyData NYC 2023. Example usage ============= @@ -55,7 +69,7 @@ Here is an example code snippet to demonstrate how to use `CuPy After the model is trained, fitted attributes that are arrays will also be from the same Array API namespace as the training data. For example, if CuPy's Array API namespace was used for training, then fitted attributes will be on the -GPU. We provide a experimental `_estimator_with_converted_arrays` utility that +GPU. We provide an experimental `_estimator_with_converted_arrays` utility that transfers an estimator attributes from Array API to a ndarray:: >>> from sklearn.utils._array_api import _estimator_with_converted_arrays @@ -97,22 +111,69 @@ Estimators `svd_solver="randomized"` and `power_iteration_normalizer="QR"`) - :class:`linear_model.Ridge` (with `solver="svd"`) - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`) +- :class:`preprocessing.Binarizer` - :class:`preprocessing.KernelCenterer` +- :class:`preprocessing.LabelEncoder` - :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` - :class:`preprocessing.Normalizer` +Meta-estimators +--------------- + +Meta-estimators that accept Array API inputs conditioned on the fact that the +base estimator also does: + +- :class:`model_selection.GridSearchCV` +- :class:`model_selection.RandomizedSearchCV` +- :class:`model_selection.HalvingGridSearchCV` +- :class:`model_selection.HalvingRandomSearchCV` + Metrics ------- +- :func:`sklearn.metrics.cluster.entropy` - :func:`sklearn.metrics.accuracy_score` +- :func:`sklearn.metrics.d2_tweedie_score` +- :func:`sklearn.metrics.explained_variance_score` +- :func:`sklearn.metrics.f1_score` +- :func:`sklearn.metrics.fbeta_score` +- :func:`sklearn.metrics.hamming_loss` +- :func:`sklearn.metrics.jaccard_score` +- :func:`sklearn.metrics.max_error` +- :func:`sklearn.metrics.mean_absolute_error` +- :func:`sklearn.metrics.mean_absolute_percentage_error` +- :func:`sklearn.metrics.mean_gamma_deviance` +- :func:`sklearn.metrics.mean_pinball_loss` +- :func:`sklearn.metrics.mean_poisson_deviance` (requires `enabling array API support for SciPy `_) +- :func:`sklearn.metrics.mean_squared_error` +- :func:`sklearn.metrics.mean_squared_log_error` +- :func:`sklearn.metrics.mean_tweedie_deviance` +- :func:`sklearn.metrics.multilabel_confusion_matrix` +- :func:`sklearn.metrics.pairwise.additive_chi2_kernel` +- :func:`sklearn.metrics.pairwise.chi2_kernel` +- :func:`sklearn.metrics.pairwise.cosine_similarity` +- :func:`sklearn.metrics.pairwise.cosine_distances` +- :func:`sklearn.metrics.pairwise.euclidean_distances` (see :ref:`device_support_for_float64`) +- :func:`sklearn.metrics.pairwise.linear_kernel` +- :func:`sklearn.metrics.pairwise.paired_cosine_distances` +- :func:`sklearn.metrics.pairwise.paired_euclidean_distances` +- :func:`sklearn.metrics.pairwise.polynomial_kernel` +- :func:`sklearn.metrics.pairwise.rbf_kernel` (see :ref:`device_support_for_float64`) +- :func:`sklearn.metrics.pairwise.sigmoid_kernel` +- :func:`sklearn.metrics.precision_score` +- :func:`sklearn.metrics.precision_recall_fscore_support` - :func:`sklearn.metrics.r2_score` +- :func:`sklearn.metrics.recall_score` +- :func:`sklearn.metrics.root_mean_squared_error` +- :func:`sklearn.metrics.root_mean_squared_log_error` - :func:`sklearn.metrics.zero_one_loss` Tools ----- - :func:`model_selection.train_test_split` +- :func:`utils.check_consistent_length` Coverage is expected to grow over time. Please follow the dedicated `meta-issue on GitHub `_ to track progress. @@ -138,12 +199,10 @@ Common estimator checks Add the `array_api_support` tag to an estimator's set of tags to indicate that it supports the Array API. This will enable dedicated checks as part of the -common tests to verify that the estimators result's are the same when using +common tests to verify that the estimators' results are the same when using vanilla NumPy and Array API inputs. -To run these checks you need to install -`array_api_compat `_ in your -test environment. To run the full set of checks you need to install both +To run the full set of checks you need to install both `PyTorch `_ and `CuPy `_ and have a GPU. Checks that can not be executed or have missing dependencies will be automatically skipped. Therefore it's important to run the tests with the @@ -151,9 +210,11 @@ automatically skipped. Therefore it's important to run the tests with the .. prompt:: bash $ - pip install array-api-compat # and other libraries as needed + pip install ... # selected libraries as needed pytest -k "array_api" -v +.. _mps_support: + Note on MPS device support -------------------------- @@ -173,3 +234,17 @@ To enable the MPS support in PyTorch, set the environment variable At the time of writing all scikit-learn tests should pass, however, the computational speed is not necessarily better than with the CPU device. + +.. _device_support_for_float64: + +Note on device support for ``float64`` +-------------------------------------- + +Certain operations within scikit-learn will automatically perform operations +on floating-point values with `float64` precision to prevent overflows and ensure +correctness (e.g., :func:`metrics.pairwise.euclidean_distances`). However, +certain combinations of array namespaces and devices, such as `PyTorch on MPS` +(see :ref:`mps_support`) do not support the `float64` data type. In these cases, +scikit-learn will revert to using the `float32` data type instead. This can result in +different behavior (typically numerically unstable results) compared to not using array +API dispatching or using a device with `float64` support. diff --git a/doc/modules/biclustering.rst b/doc/modules/biclustering.rst index 2189e85e0f0ef..41c2316c753ad 100644 --- a/doc/modules/biclustering.rst +++ b/doc/modules/biclustering.rst @@ -147,21 +147,21 @@ Then the rows of :math:`Z` are clustered using :ref:`k-means and the remaining ``n_columns`` labels provide the column partitioning. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_coclustering.py`: A simple example - showing how to generate a data matrix with biclusters and apply - this method to it. +* :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_coclustering.py`: A simple example + showing how to generate a data matrix with biclusters and apply + this method to it. - * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py`: An example of finding - biclusters in the twenty newsgroup dataset. +* :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py`: An example of finding + biclusters in the twenty newsgroup dataset. -.. topic:: References: +.. rubric:: References - * Dhillon, Inderjit S, 2001. :doi:`Co-clustering documents and words using - bipartite spectral graph partitioning - <10.1145/502512.502550>` +* Dhillon, Inderjit S, 2001. :doi:`Co-clustering documents and words using + bipartite spectral graph partitioning + <10.1145/502512.502550>` .. _spectral_biclustering: @@ -220,7 +220,7 @@ Given these singular vectors, they are ranked according to which can be best approximated by a piecewise-constant vector. The approximations for each vector are found using one-dimensional k-means and scored using the Euclidean distance. Some subset of the best left -and right singular vector are selected. Next, the data is projected to +and right singular vectors are selected. Next, the data is projected to this best subset of singular vectors and clustered. For instance, if :math:`p` singular vectors were calculated, the @@ -234,17 +234,17 @@ Similarly, projecting the columns to :math:`A^{\top} * U` and clustering this :math:`n \times q` matrix yields the column labels. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_biclustering.py`: a simple example - showing how to generate a checkerboard matrix and bicluster it. +* :ref:`sphx_glr_auto_examples_bicluster_plot_spectral_biclustering.py`: a simple example + showing how to generate a checkerboard matrix and bicluster it. -.. topic:: References: +.. rubric:: References - * Kluger, Yuval, et. al., 2003. :doi:`Spectral biclustering of microarray - data: coclustering genes and conditions - <10.1101/gr.648603>` +* Kluger, Yuval, et. al., 2003. :doi:`Spectral biclustering of microarray + data: coclustering genes and conditions + <10.1101/gr.648603>` .. _biclustering_evaluation: @@ -276,7 +276,7 @@ now, only the Jaccard index is implemented: where :math:`A` and :math:`B` are biclusters, :math:`|A \cap B|` is the number of elements in their intersection. The Jaccard index -achieves its minimum of 0 when the biclusters to not overlap at all +achieves its minimum of 0 when the biclusters do not overlap at all and its maximum of 1 when they are identical. Several methods have been developed to compare two sets of biclusters. @@ -288,7 +288,8 @@ available: 2. Assign biclusters from one set to another in a one-to-one fashion to maximize the sum of their similarities. This step is performed - using the Hungarian algorithm. + using :func:`scipy.optimize.linear_sum_assignment`, which uses a + modified Jonker-Volgenant algorithm. 3. The final sum of similarities is divided by the size of the larger set. @@ -298,8 +299,8 @@ are totally dissimilar. The maximum score, 1, occurs when both sets are identical. -.. topic:: References: +.. rubric:: References - * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis - for bicluster acquisition - `__. +* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis + for bicluster acquisition + `__. diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst index c0a6edb837b2f..a7b34065fe330 100644 --- a/doc/modules/calibration.rst +++ b/doc/modules/calibration.rst @@ -149,9 +149,14 @@ The :class:`CalibratedClassifierCV` class is used to calibrate a classifier. unbiased data is always used to fit the calibrator. The data is split into k `(train_set, test_set)` couples (as determined by `cv`). When `ensemble=True` (default), the following procedure is repeated independently for each -cross-validation split: a clone of `base_estimator` is first trained on the -train subset. Then its predictions on the test subset are used to fit a -calibrator (either a sigmoid or isotonic regressor). This results in an +cross-validation split: + +1. a clone of `base_estimator` is trained on the train subset +2. the trained `base_estimator` makes predictions on the test subset +3. the predictions are used to fit a calibrator (either a sigmoid or isotonic + regressor) (when the data is multiclass, a calibrator is fit for every class) + +This results in an ensemble of k `(classifier, calibrator)` couples where each calibrator maps the output of its corresponding classifier into [0, 1]. Each couple is exposed in the `calibrated_classifiers_` attribute, where each entry is a calibrated @@ -162,6 +167,15 @@ predicted probabilities of the `k` estimators in the `calibrated_classifiers_` list. The output of :term:`predict` is the class that has the highest probability. +It is important to choose `cv` carefully when using `ensemble=True`. +All classes should be present in both train and test subsets for every split. +When a class is absent in the train subset, the predicted probability for that +class will default to 0 for the `(classifier, calibrator)` couple of that split. +This skews the :term:`predict_proba` as it averages across all couples. +When a class is absent in the test subset, the calibrator for that class +(within the `(classifier, calibrator)` couple of that split) is +fit on data with no positive class. This results in ineffective calibration. + When `ensemble=False`, cross-validation is used to obtain 'unbiased' predictions for all the data, via :func:`~sklearn.model_selection.cross_val_predict`. @@ -179,11 +193,11 @@ The main advantage of using `ensemble=False` is computational: it reduces the overall fit time by training only a single base classifier and calibrator pair, decreases the final model size and increases prediction speed. -Alternatively an already fitted classifier can be calibrated by setting -`cv="prefit"`. In this case, the data is not split and all of it is used to -fit the regressor. It is up to the user to -make sure that the data used for fitting the classifier is disjoint from the -data used for fitting the regressor. +Alternatively an already fitted classifier can be calibrated by using a +:class:`~sklearn.frozen.FrozenEstimator` as +``CalibratedClassifierCV(estimator=FrozenEstimator(estimator))``. +It is up to the user to make sure that the data used for fitting the classifier +is disjoint from the data used for fitting the regressor. :class:`CalibratedClassifierCV` supports the use of two regression techniques for calibration via the `method` parameter: `"sigmoid"` and `"isotonic"`. @@ -262,51 +276,51 @@ probabilities, the calibrated probabilities for each class are predicted separately. As those probabilities do not necessarily sum to one, a postprocessing is performed to normalize them. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration_multiclass.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` - * :ref:`sphx_glr_auto_examples_calibration_plot_compare_calibration.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration_curve.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration_multiclass.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` +* :ref:`sphx_glr_auto_examples_calibration_plot_compare_calibration.py` -.. topic:: References: +.. rubric:: References - .. [1] Allan H. Murphy (1973). - :doi:`"A New Vector Partition of the Probability Score" - <10.1175/1520-0450(1973)012%3C0595:ANVPOT%3E2.0.CO;2>` - Journal of Applied Meteorology and Climatology +.. [1] Allan H. Murphy (1973). + :doi:`"A New Vector Partition of the Probability Score" + <10.1175/1520-0450(1973)012%3C0595:ANVPOT%3E2.0.CO;2>` + Journal of Applied Meteorology and Climatology - .. [2] `On the combination of forecast probabilities for - consecutive precipitation periods. - `_ - Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a +.. [2] `On the combination of forecast probabilities for + consecutive precipitation periods. + `_ + Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a - .. [3] `Predicting Good Probabilities with Supervised Learning - `_, - A. Niculescu-Mizil & R. Caruana, ICML 2005 +.. [3] `Predicting Good Probabilities with Supervised Learning + `_, + A. Niculescu-Mizil & R. Caruana, ICML 2005 - .. [4] `Probabilistic Outputs for Support Vector Machines and Comparisons - to Regularized Likelihood Methods. - `_ - J. Platt, (1999) +.. [4] `Probabilistic Outputs for Support Vector Machines and Comparisons + to Regularized Likelihood Methods. + `_ + J. Platt, (1999) - .. [5] `Transforming Classifier Scores into Accurate Multiclass - Probability Estimates. - `_ - B. Zadrozny & C. Elkan, (KDD 2002) +.. [5] `Transforming Classifier Scores into Accurate Multiclass + Probability Estimates. + `_ + B. Zadrozny & C. Elkan, (KDD 2002) - .. [6] `Predicting accurate probabilities with a ranking loss. - `_ - Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L. - Proc Int Conf Mach Learn. 2012;2012:703-710 +.. [6] `Predicting accurate probabilities with a ranking loss. + `_ + Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L. + Proc Int Conf Mach Learn. 2012;2012:703-710 - .. [7] `Beyond sigmoids: How to obtain well-calibrated probabilities from - binary classifiers with beta calibration - `_ - Kull, M., Silva Filho, T. M., & Flach, P. (2017). +.. [7] `Beyond sigmoids: How to obtain well-calibrated probabilities from + binary classifiers with beta calibration + `_ + Kull, M., Silva Filho, T. M., & Flach, P. (2017). - .. [8] Mario V. Wüthrich, Michael Merz (2023). - :doi:`"Statistical Foundations of Actuarial Learning and its Applications" - <10.1007/978-3-031-12409-9>` - Springer Actuarial +.. [8] Mario V. Wüthrich, Michael Merz (2023). + :doi:`"Statistical Foundations of Actuarial Learning and its Applications" + <10.1007/978-3-031-12409-9>` + Springer Actuarial diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst deleted file mode 100644 index 804546eababef..0000000000000 --- a/doc/modules/classes.rst +++ /dev/null @@ -1,1915 +0,0 @@ -.. _api_ref: - -============= -API Reference -============= - -This is the class and function reference of scikit-learn. Please refer to -the :ref:`full user guide ` for further details, as the class and -function raw specifications may not be enough to give full guidelines on their -uses. -For reference on concepts repeated across the API, see :ref:`glossary`. - -:mod:`sklearn`: Settings and information tools -============================================== - -.. automodule:: sklearn - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - config_context - get_config - set_config - show_versions - -:mod:`sklearn.base`: Base classes and utility functions -======================================================= - -.. automodule:: sklearn.base - :no-members: - :no-inherited-members: - -Base classes ------------- -.. currentmodule:: sklearn - -.. autosummary:: - :nosignatures: - :toctree: generated/ - :template: class.rst - - base.BaseEstimator - base.BiclusterMixin - base.ClassifierMixin - base.ClusterMixin - base.DensityMixin - base.RegressorMixin - base.TransformerMixin - base.MetaEstimatorMixin - base.OneToOneFeatureMixin - base.OutlierMixin - base.ClassNamePrefixFeaturesOutMixin - feature_selection.SelectorMixin - -Functions ---------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - base.clone - base.is_classifier - base.is_regressor - -.. _calibration_ref: - -:mod:`sklearn.calibration`: Probability Calibration -=================================================== - -.. automodule:: sklearn.calibration - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`calibration` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - calibration.CalibratedClassifierCV - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - calibration.calibration_curve - -.. _cluster_ref: - -:mod:`sklearn.cluster`: Clustering -================================== - -.. automodule:: sklearn.cluster - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`clustering` and :ref:`biclustering` sections for -further details. - -Classes -------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cluster.AffinityPropagation - cluster.AgglomerativeClustering - cluster.Birch - cluster.DBSCAN - cluster.HDBSCAN - cluster.FeatureAgglomeration - cluster.KMeans - cluster.BisectingKMeans - cluster.MiniBatchKMeans - cluster.MeanShift - cluster.OPTICS - cluster.SpectralClustering - cluster.SpectralBiclustering - cluster.SpectralCoclustering - -Functions ---------- -.. autosummary:: - :toctree: generated/ - :template: function.rst - - cluster.affinity_propagation - cluster.cluster_optics_dbscan - cluster.cluster_optics_xi - cluster.compute_optics_graph - cluster.dbscan - cluster.estimate_bandwidth - cluster.k_means - cluster.kmeans_plusplus - cluster.mean_shift - cluster.spectral_clustering - cluster.ward_tree - -.. _compose_ref: - -:mod:`sklearn.compose`: Composite Estimators -============================================ - -.. automodule:: sklearn.compose - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - compose.ColumnTransformer - compose.TransformedTargetRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - compose.make_column_transformer - compose.make_column_selector - -.. _covariance_ref: - -:mod:`sklearn.covariance`: Covariance Estimators -================================================ - -.. automodule:: sklearn.covariance - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`covariance` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - covariance.EmpiricalCovariance - covariance.EllipticEnvelope - covariance.GraphicalLasso - covariance.GraphicalLassoCV - covariance.LedoitWolf - covariance.MinCovDet - covariance.OAS - covariance.ShrunkCovariance - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - covariance.empirical_covariance - covariance.graphical_lasso - covariance.ledoit_wolf - covariance.ledoit_wolf_shrinkage - covariance.oas - covariance.shrunk_covariance - -.. _cross_decomposition_ref: - -:mod:`sklearn.cross_decomposition`: Cross decomposition -======================================================= - -.. automodule:: sklearn.cross_decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_decomposition` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cross_decomposition.CCA - cross_decomposition.PLSCanonical - cross_decomposition.PLSRegression - cross_decomposition.PLSSVD - -.. _datasets_ref: - -:mod:`sklearn.datasets`: Datasets -================================= - -.. automodule:: sklearn.datasets - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`datasets` section for further details. - -Loaders -------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.clear_data_home - datasets.dump_svmlight_file - datasets.fetch_20newsgroups - datasets.fetch_20newsgroups_vectorized - datasets.fetch_california_housing - datasets.fetch_covtype - datasets.fetch_kddcup99 - datasets.fetch_lfw_pairs - datasets.fetch_lfw_people - datasets.fetch_olivetti_faces - datasets.fetch_openml - datasets.fetch_rcv1 - datasets.fetch_species_distributions - datasets.get_data_home - datasets.load_breast_cancer - datasets.load_diabetes - datasets.load_digits - datasets.load_files - datasets.load_iris - datasets.load_linnerud - datasets.load_sample_image - datasets.load_sample_images - datasets.load_svmlight_file - datasets.load_svmlight_files - datasets.load_wine - -Samples generator ------------------ - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.make_biclusters - datasets.make_blobs - datasets.make_checkerboard - datasets.make_circles - datasets.make_classification - datasets.make_friedman1 - datasets.make_friedman2 - datasets.make_friedman3 - datasets.make_gaussian_quantiles - datasets.make_hastie_10_2 - datasets.make_low_rank_matrix - datasets.make_moons - datasets.make_multilabel_classification - datasets.make_regression - datasets.make_s_curve - datasets.make_sparse_coded_signal - datasets.make_sparse_spd_matrix - datasets.make_sparse_uncorrelated - datasets.make_spd_matrix - datasets.make_swiss_roll - - -.. _decomposition_ref: - -:mod:`sklearn.decomposition`: Matrix Decomposition -================================================== - -.. automodule:: sklearn.decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`decompositions` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - decomposition.DictionaryLearning - decomposition.FactorAnalysis - decomposition.FastICA - decomposition.IncrementalPCA - decomposition.KernelPCA - decomposition.LatentDirichletAllocation - decomposition.MiniBatchDictionaryLearning - decomposition.MiniBatchSparsePCA - decomposition.NMF - decomposition.MiniBatchNMF - decomposition.PCA - decomposition.SparsePCA - decomposition.SparseCoder - decomposition.TruncatedSVD - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - decomposition.dict_learning - decomposition.dict_learning_online - decomposition.fastica - decomposition.non_negative_factorization - decomposition.sparse_encode - -.. _lda_ref: - -:mod:`sklearn.discriminant_analysis`: Discriminant Analysis -=========================================================== - -.. automodule:: sklearn.discriminant_analysis - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`lda_qda` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - discriminant_analysis.LinearDiscriminantAnalysis - discriminant_analysis.QuadraticDiscriminantAnalysis - -.. _dummy_ref: - -:mod:`sklearn.dummy`: Dummy estimators -====================================== - -.. automodule:: sklearn.dummy - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`model_evaluation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - dummy.DummyClassifier - dummy.DummyRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - -.. _ensemble_ref: - -:mod:`sklearn.ensemble`: Ensemble Methods -========================================= - -.. automodule:: sklearn.ensemble - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`ensemble` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - ensemble.AdaBoostClassifier - ensemble.AdaBoostRegressor - ensemble.BaggingClassifier - ensemble.BaggingRegressor - ensemble.ExtraTreesClassifier - ensemble.ExtraTreesRegressor - ensemble.GradientBoostingClassifier - ensemble.GradientBoostingRegressor - ensemble.IsolationForest - ensemble.RandomForestClassifier - ensemble.RandomForestRegressor - ensemble.RandomTreesEmbedding - ensemble.StackingClassifier - ensemble.StackingRegressor - ensemble.VotingClassifier - ensemble.VotingRegressor - ensemble.HistGradientBoostingRegressor - ensemble.HistGradientBoostingClassifier - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - -.. _exceptions_ref: - -:mod:`sklearn.exceptions`: Exceptions and warnings -================================================== - -.. automodule:: sklearn.exceptions - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - exceptions.ConvergenceWarning - exceptions.DataConversionWarning - exceptions.DataDimensionalityWarning - exceptions.EfficiencyWarning - exceptions.FitFailedWarning - exceptions.InconsistentVersionWarning - exceptions.NotFittedError - exceptions.UndefinedMetricWarning - - -:mod:`sklearn.experimental`: Experimental -========================================= - -.. automodule:: sklearn.experimental - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - - experimental.enable_iterative_imputer - experimental.enable_halving_search_cv - - -.. _feature_extraction_ref: - -:mod:`sklearn.feature_extraction`: Feature Extraction -===================================================== - -.. automodule:: sklearn.feature_extraction - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_extraction` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.DictVectorizer - feature_extraction.FeatureHasher - -From images ------------ - -.. automodule:: sklearn.feature_extraction.image - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_extraction.image.extract_patches_2d - feature_extraction.image.grid_to_graph - feature_extraction.image.img_to_graph - feature_extraction.image.reconstruct_from_patches_2d - - :template: class.rst - - feature_extraction.image.PatchExtractor - -.. _text_feature_extraction_ref: - -From text ---------- - -.. automodule:: sklearn.feature_extraction.text - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.text.CountVectorizer - feature_extraction.text.HashingVectorizer - feature_extraction.text.TfidfTransformer - feature_extraction.text.TfidfVectorizer - - -.. _feature_selection_ref: - -:mod:`sklearn.feature_selection`: Feature Selection -=================================================== - -.. automodule:: sklearn.feature_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_selection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_selection.GenericUnivariateSelect - feature_selection.SelectPercentile - feature_selection.SelectKBest - feature_selection.SelectFpr - feature_selection.SelectFdr - feature_selection.SelectFromModel - feature_selection.SelectFwe - feature_selection.SequentialFeatureSelector - feature_selection.RFE - feature_selection.RFECV - feature_selection.VarianceThreshold - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_selection.chi2 - feature_selection.f_classif - feature_selection.f_regression - feature_selection.r_regression - feature_selection.mutual_info_classif - feature_selection.mutual_info_regression - - -.. _gaussian_process_ref: - -:mod:`sklearn.gaussian_process`: Gaussian Processes -=================================================== - -.. automodule:: sklearn.gaussian_process - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`gaussian_process` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - gaussian_process.GaussianProcessClassifier - gaussian_process.GaussianProcessRegressor - -Kernels -------- - -.. automodule:: sklearn.gaussian_process.kernels - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class_with_call.rst - - gaussian_process.kernels.CompoundKernel - gaussian_process.kernels.ConstantKernel - gaussian_process.kernels.DotProduct - gaussian_process.kernels.ExpSineSquared - gaussian_process.kernels.Exponentiation - gaussian_process.kernels.Hyperparameter - gaussian_process.kernels.Kernel - gaussian_process.kernels.Matern - gaussian_process.kernels.PairwiseKernel - gaussian_process.kernels.Product - gaussian_process.kernels.RBF - gaussian_process.kernels.RationalQuadratic - gaussian_process.kernels.Sum - gaussian_process.kernels.WhiteKernel - - -.. _impute_ref: - -:mod:`sklearn.impute`: Impute -============================= - -.. automodule:: sklearn.impute - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`Impute` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - impute.SimpleImputer - impute.IterativeImputer - impute.MissingIndicator - impute.KNNImputer - - -.. _inspection_ref: - -:mod:`sklearn.inspection`: Inspection -===================================== - -.. automodule:: sklearn.inspection - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - inspection.partial_dependence - inspection.permutation_importance - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - inspection.DecisionBoundaryDisplay - inspection.PartialDependenceDisplay - -.. _isotonic_ref: - -:mod:`sklearn.isotonic`: Isotonic regression -============================================ - -.. automodule:: sklearn.isotonic - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`isotonic` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - isotonic.IsotonicRegression - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - isotonic.check_increasing - isotonic.isotonic_regression - - -.. _kernel_approximation_ref: - -:mod:`sklearn.kernel_approximation`: Kernel Approximation -========================================================= - -.. automodule:: sklearn.kernel_approximation - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_approximation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_approximation.AdditiveChi2Sampler - kernel_approximation.Nystroem - kernel_approximation.PolynomialCountSketch - kernel_approximation.RBFSampler - kernel_approximation.SkewedChi2Sampler - -.. _kernel_ridge_ref: - -:mod:`sklearn.kernel_ridge`: Kernel Ridge Regression -==================================================== - -.. automodule:: sklearn.kernel_ridge - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_ridge` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_ridge.KernelRidge - -.. _linear_model_ref: - -:mod:`sklearn.linear_model`: Linear Models -========================================== - -.. automodule:: sklearn.linear_model - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`linear_model` section for further details. - -The following subsections are only rough guidelines: the same estimator can -fall into multiple categories, depending on its parameters. - -.. currentmodule:: sklearn - -Linear classifiers ------------------- -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LogisticRegression - linear_model.LogisticRegressionCV - linear_model.PassiveAggressiveClassifier - linear_model.Perceptron - linear_model.RidgeClassifier - linear_model.RidgeClassifierCV - linear_model.SGDClassifier - linear_model.SGDOneClassSVM - -Classical linear regressors ---------------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LinearRegression - linear_model.Ridge - linear_model.RidgeCV - linear_model.SGDRegressor - -Regressors with variable selection ----------------------------------- - -The following estimators have built-in variable selection fitting -procedures, but any estimator using a L1 or elastic-net penalty also -performs variable selection: typically :class:`~linear_model.SGDRegressor` -or :class:`~sklearn.linear_model.SGDClassifier` with an appropriate penalty. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ElasticNet - linear_model.ElasticNetCV - linear_model.Lars - linear_model.LarsCV - linear_model.Lasso - linear_model.LassoCV - linear_model.LassoLars - linear_model.LassoLarsCV - linear_model.LassoLarsIC - linear_model.OrthogonalMatchingPursuit - linear_model.OrthogonalMatchingPursuitCV - -Bayesian regressors -------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ARDRegression - linear_model.BayesianRidge - -Multi-task linear regressors with variable selection ----------------------------------------------------- - -These estimators fit multiple regression problems (or tasks) jointly, while -inducing sparse coefficients. While the inferred coefficients may differ -between the tasks, they are constrained to agree on the features that are -selected (non-zero coefficients). - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.MultiTaskElasticNet - linear_model.MultiTaskElasticNetCV - linear_model.MultiTaskLasso - linear_model.MultiTaskLassoCV - -Outlier-robust regressors -------------------------- - -Any estimator using the Huber loss would also be robust to outliers, e.g. -:class:`~linear_model.SGDRegressor` with ``loss='huber'``. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.HuberRegressor - linear_model.QuantileRegressor - linear_model.RANSACRegressor - linear_model.TheilSenRegressor - -Generalized linear models (GLM) for regression ----------------------------------------------- - -These models allow for response variables to have error distributions other -than a normal distribution: - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.PoissonRegressor - linear_model.TweedieRegressor - linear_model.GammaRegressor - - -Miscellaneous -------------- - -.. autosummary:: - :toctree: generated/ - :template: classes.rst - - linear_model.PassiveAggressiveRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - linear_model.enet_path - linear_model.lars_path - linear_model.lars_path_gram - linear_model.lasso_path - linear_model.orthogonal_mp - linear_model.orthogonal_mp_gram - linear_model.ridge_regression - - -.. _manifold_ref: - -:mod:`sklearn.manifold`: Manifold Learning -========================================== - -.. automodule:: sklearn.manifold - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`manifold` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - manifold.Isomap - manifold.LocallyLinearEmbedding - manifold.MDS - manifold.SpectralEmbedding - manifold.TSNE - -.. autosummary:: - :toctree: generated - :template: function.rst - - manifold.locally_linear_embedding - manifold.smacof - manifold.spectral_embedding - manifold.trustworthiness - - -.. _metrics_ref: - -:mod:`sklearn.metrics`: Metrics -=============================== - -See the :ref:`model_evaluation` section and the :ref:`metrics` section of the -user guide for further details. - -.. automodule:: sklearn.metrics - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -Model Selection Interface -------------------------- -See the :ref:`scoring_parameter` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.check_scoring - metrics.get_scorer - metrics.get_scorer_names - metrics.make_scorer - -Classification metrics ----------------------- - -See the :ref:`classification_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.accuracy_score - metrics.auc - metrics.average_precision_score - metrics.balanced_accuracy_score - metrics.brier_score_loss - metrics.class_likelihood_ratios - metrics.classification_report - metrics.cohen_kappa_score - metrics.confusion_matrix - metrics.dcg_score - metrics.det_curve - metrics.f1_score - metrics.fbeta_score - metrics.hamming_loss - metrics.hinge_loss - metrics.jaccard_score - metrics.log_loss - metrics.matthews_corrcoef - metrics.multilabel_confusion_matrix - metrics.ndcg_score - metrics.precision_recall_curve - metrics.precision_recall_fscore_support - metrics.precision_score - metrics.recall_score - metrics.roc_auc_score - metrics.roc_curve - metrics.top_k_accuracy_score - metrics.zero_one_loss - -Regression metrics ------------------- - -See the :ref:`regression_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.explained_variance_score - metrics.max_error - metrics.mean_absolute_error - metrics.mean_squared_error - metrics.mean_squared_log_error - metrics.median_absolute_error - metrics.mean_absolute_percentage_error - metrics.r2_score - metrics.root_mean_squared_log_error - metrics.root_mean_squared_error - metrics.mean_poisson_deviance - metrics.mean_gamma_deviance - metrics.mean_tweedie_deviance - metrics.d2_tweedie_score - metrics.mean_pinball_loss - metrics.d2_pinball_score - metrics.d2_absolute_error_score - -Multilabel ranking metrics --------------------------- -See the :ref:`multilabel_ranking_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.coverage_error - metrics.label_ranking_average_precision_score - metrics.label_ranking_loss - - -Clustering metrics ------------------- - -See the :ref:`clustering_evaluation` section of the user guide for further -details. - -.. automodule:: sklearn.metrics.cluster - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.adjusted_mutual_info_score - metrics.adjusted_rand_score - metrics.calinski_harabasz_score - metrics.davies_bouldin_score - metrics.completeness_score - metrics.cluster.contingency_matrix - metrics.cluster.pair_confusion_matrix - metrics.fowlkes_mallows_score - metrics.homogeneity_completeness_v_measure - metrics.homogeneity_score - metrics.mutual_info_score - metrics.normalized_mutual_info_score - metrics.rand_score - metrics.silhouette_score - metrics.silhouette_samples - metrics.v_measure_score - -Biclustering metrics --------------------- - -See the :ref:`biclustering_evaluation` section of the user guide for -further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.consensus_score - -Distance metrics ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - metrics.DistanceMetric - -Pairwise metrics ----------------- - -See the :ref:`metrics` section of the user guide for further details. - -.. automodule:: sklearn.metrics.pairwise - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.pairwise.additive_chi2_kernel - metrics.pairwise.chi2_kernel - metrics.pairwise.cosine_similarity - metrics.pairwise.cosine_distances - metrics.pairwise.distance_metrics - metrics.pairwise.euclidean_distances - metrics.pairwise.haversine_distances - metrics.pairwise.kernel_metrics - metrics.pairwise.laplacian_kernel - metrics.pairwise.linear_kernel - metrics.pairwise.manhattan_distances - metrics.pairwise.nan_euclidean_distances - metrics.pairwise.pairwise_kernels - metrics.pairwise.polynomial_kernel - metrics.pairwise.rbf_kernel - metrics.pairwise.sigmoid_kernel - metrics.pairwise.paired_euclidean_distances - metrics.pairwise.paired_manhattan_distances - metrics.pairwise.paired_cosine_distances - metrics.pairwise.paired_distances - metrics.pairwise_distances - metrics.pairwise_distances_argmin - metrics.pairwise_distances_argmin_min - metrics.pairwise_distances_chunked - - -Plotting --------- - -See the :ref:`visualizations` section of the user guide for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_all_class_methods.rst - - metrics.ConfusionMatrixDisplay - metrics.DetCurveDisplay - metrics.PrecisionRecallDisplay - metrics.PredictionErrorDisplay - metrics.RocCurveDisplay - calibration.CalibrationDisplay - -.. _mixture_ref: - -:mod:`sklearn.mixture`: Gaussian Mixture Models -=============================================== - -.. automodule:: sklearn.mixture - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`mixture` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - mixture.BayesianGaussianMixture - mixture.GaussianMixture - -.. _modelselection_ref: - -:mod:`sklearn.model_selection`: Model Selection -=============================================== - -.. automodule:: sklearn.model_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and -:ref:`learning_curve` sections for further details. - -Splitter Classes ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GroupKFold - model_selection.GroupShuffleSplit - model_selection.KFold - model_selection.LeaveOneGroupOut - model_selection.LeavePGroupsOut - model_selection.LeaveOneOut - model_selection.LeavePOut - model_selection.PredefinedSplit - model_selection.RepeatedKFold - model_selection.RepeatedStratifiedKFold - model_selection.ShuffleSplit - model_selection.StratifiedKFold - model_selection.StratifiedShuffleSplit - model_selection.StratifiedGroupKFold - model_selection.TimeSeriesSplit - -Splitter Functions ------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.check_cv - model_selection.train_test_split - -.. _hyper_parameter_optimizers: - -Hyper-parameter optimizers --------------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GridSearchCV - model_selection.HalvingGridSearchCV - model_selection.ParameterGrid - model_selection.ParameterSampler - model_selection.RandomizedSearchCV - model_selection.HalvingRandomSearchCV - -Post-fit model tuning ---------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.FixedThresholdClassifier - model_selection.TunedThresholdClassifierCV - -Model validation ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.cross_validate - model_selection.cross_val_predict - model_selection.cross_val_score - model_selection.learning_curve - model_selection.permutation_test_score - model_selection.validation_curve - -Visualization -------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - model_selection.LearningCurveDisplay - model_selection.ValidationCurveDisplay - -.. _multiclass_ref: - -:mod:`sklearn.multiclass`: Multiclass classification -==================================================== - -.. automodule:: sklearn.multiclass - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multiclass_classification` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - multiclass.OneVsRestClassifier - multiclass.OneVsOneClassifier - multiclass.OutputCodeClassifier - -.. _multioutput_ref: - -:mod:`sklearn.multioutput`: Multioutput regression and classification -===================================================================== - -.. automodule:: sklearn.multioutput - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multilabel_classification`, -:ref:`multiclass_multioutput_classification`, and -:ref:`multioutput_regression` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - multioutput.ClassifierChain - multioutput.MultiOutputRegressor - multioutput.MultiOutputClassifier - multioutput.RegressorChain - -.. _naive_bayes_ref: - -:mod:`sklearn.naive_bayes`: Naive Bayes -======================================= - -.. automodule:: sklearn.naive_bayes - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`naive_bayes` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - naive_bayes.BernoulliNB - naive_bayes.CategoricalNB - naive_bayes.ComplementNB - naive_bayes.GaussianNB - naive_bayes.MultinomialNB - - -.. _neighbors_ref: - -:mod:`sklearn.neighbors`: Nearest Neighbors -=========================================== - -.. automodule:: sklearn.neighbors - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neighbors` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neighbors.BallTree - neighbors.KDTree - neighbors.KernelDensity - neighbors.KNeighborsClassifier - neighbors.KNeighborsRegressor - neighbors.KNeighborsTransformer - neighbors.LocalOutlierFactor - neighbors.RadiusNeighborsClassifier - neighbors.RadiusNeighborsRegressor - neighbors.RadiusNeighborsTransformer - neighbors.NearestCentroid - neighbors.NearestNeighbors - neighbors.NeighborhoodComponentsAnalysis - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - neighbors.kneighbors_graph - neighbors.radius_neighbors_graph - neighbors.sort_graph_by_row_values - -.. _neural_network_ref: - -:mod:`sklearn.neural_network`: Neural network models -==================================================== - -.. automodule:: sklearn.neural_network - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neural_networks_supervised` and :ref:`neural_networks_unsupervised` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neural_network.BernoulliRBM - neural_network.MLPClassifier - neural_network.MLPRegressor - -.. _pipeline_ref: - -:mod:`sklearn.pipeline`: Pipeline -================================= - -.. automodule:: sklearn.pipeline - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - pipeline.FeatureUnion - pipeline.Pipeline - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - pipeline.make_pipeline - pipeline.make_union - -.. _preprocessing_ref: - -:mod:`sklearn.preprocessing`: Preprocessing and Normalization -============================================================= - -.. automodule:: sklearn.preprocessing - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`preprocessing` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - preprocessing.Binarizer - preprocessing.FunctionTransformer - preprocessing.KBinsDiscretizer - preprocessing.KernelCenterer - preprocessing.LabelBinarizer - preprocessing.LabelEncoder - preprocessing.MultiLabelBinarizer - preprocessing.MaxAbsScaler - preprocessing.MinMaxScaler - preprocessing.Normalizer - preprocessing.OneHotEncoder - preprocessing.OrdinalEncoder - preprocessing.PolynomialFeatures - preprocessing.PowerTransformer - preprocessing.QuantileTransformer - preprocessing.RobustScaler - preprocessing.SplineTransformer - preprocessing.StandardScaler - preprocessing.TargetEncoder - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - preprocessing.add_dummy_feature - preprocessing.binarize - preprocessing.label_binarize - preprocessing.maxabs_scale - preprocessing.minmax_scale - preprocessing.normalize - preprocessing.quantile_transform - preprocessing.robust_scale - preprocessing.scale - preprocessing.power_transform - - -.. _random_projection_ref: - -:mod:`sklearn.random_projection`: Random projection -=================================================== - -.. automodule:: sklearn.random_projection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`random_projection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - random_projection.GaussianRandomProjection - random_projection.SparseRandomProjection - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - random_projection.johnson_lindenstrauss_min_dim - - -.. _semi_supervised_ref: - -:mod:`sklearn.semi_supervised`: Semi-Supervised Learning -======================================================== - -.. automodule:: sklearn.semi_supervised - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`semi_supervised` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - semi_supervised.LabelPropagation - semi_supervised.LabelSpreading - semi_supervised.SelfTrainingClassifier - - -.. _svm_ref: - -:mod:`sklearn.svm`: Support Vector Machines -=========================================== - -.. automodule:: sklearn.svm - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`svm` section for further details. - -Estimators ----------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - svm.LinearSVC - svm.LinearSVR - svm.NuSVC - svm.NuSVR - svm.OneClassSVM - svm.SVC - svm.SVR - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - svm.l1_min_c - -.. _tree_ref: - -:mod:`sklearn.tree`: Decision Trees -=================================== - -.. automodule:: sklearn.tree - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`tree` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - tree.DecisionTreeClassifier - tree.DecisionTreeRegressor - tree.ExtraTreeClassifier - tree.ExtraTreeRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.export_graphviz - tree.export_text - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.plot_tree - -.. _utils_ref: - -:mod:`sklearn.utils`: Utilities -=============================== - -.. automodule:: sklearn.utils - :no-members: - :no-inherited-members: - -**Developer guide:** See the :ref:`developers-utils` page for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.Bunch - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.as_float_array - utils.assert_all_finite - utils.deprecated - utils.estimator_html_repr - utils.gen_batches - utils.gen_even_slices - utils.indexable - utils.murmurhash3_32 - utils.resample - utils._safe_indexing - utils.safe_mask - utils.safe_sqr - utils.shuffle - -Input and parameter validation ------------------------------- - -.. automodule:: sklearn.utils.validation - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.check_X_y - utils.check_array - utils.check_scalar - utils.check_consistent_length - utils.check_random_state - utils.validation.check_is_fitted - utils.validation.check_memory - utils.validation.check_symmetric - utils.validation.column_or_1d - utils.validation.has_fit_parameter - -Utilities used in meta-estimators ---------------------------------- - -.. automodule:: sklearn.utils.metaestimators - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.metaestimators.available_if - -Utilities to handle weights based on class labels -------------------------------------------------- - -.. automodule:: sklearn.utils.class_weight - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.class_weight.compute_class_weight - utils.class_weight.compute_sample_weight - -Utilities to deal with multiclass target in classifiers -------------------------------------------------------- - -.. automodule:: sklearn.utils.multiclass - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.multiclass.type_of_target - utils.multiclass.is_multilabel - utils.multiclass.unique_labels - -Utilities for optimal mathematical operations ---------------------------------------------- - -.. automodule:: sklearn.utils.extmath - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.extmath.safe_sparse_dot - utils.extmath.randomized_range_finder - utils.extmath.randomized_svd - utils.extmath.fast_logdet - utils.extmath.density - utils.extmath.weighted_mode - -Utilities to work with sparse matrices and arrays -------------------------------------------------- - -.. automodule:: sklearn.utils.sparsefuncs - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.sparsefuncs.incr_mean_variance_axis - utils.sparsefuncs.inplace_column_scale - utils.sparsefuncs.inplace_row_scale - utils.sparsefuncs.inplace_swap_row - utils.sparsefuncs.inplace_swap_column - utils.sparsefuncs.mean_variance_axis - utils.sparsefuncs.inplace_csr_column_scale - -.. automodule:: sklearn.utils.sparsefuncs_fast - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.sparsefuncs_fast.inplace_csr_row_normalize_l1 - utils.sparsefuncs_fast.inplace_csr_row_normalize_l2 - -Utilities to work with graphs ------------------------------ - -.. automodule:: sklearn.utils.graph - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.graph.single_source_shortest_path_length - -Utilities for random sampling ------------------------------ - -.. automodule:: sklearn.utils.random - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.random.sample_without_replacement - - -Utilities to operate on arrays ------------------------------- - -.. automodule:: sklearn.utils.arrayfuncs - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.arrayfuncs.min_pos - -Metadata routing ----------------- - -.. automodule:: sklearn.utils.metadata_routing - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.metadata_routing.get_routing_for_object - utils.metadata_routing.process_routing - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.metadata_routing.MetadataRouter - utils.metadata_routing.MetadataRequest - utils.metadata_routing.MethodMapping - -Scikit-learn object discovery ------------------------------ - -.. automodule:: sklearn.utils.discovery - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.discovery.all_estimators - utils.discovery.all_displays - utils.discovery.all_functions - -Scikit-learn compatibility checker ----------------------------------- - -.. automodule:: sklearn.utils.estimator_checks - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.estimator_checks.check_estimator - utils.estimator_checks.parametrize_with_checks - -Utilities for parallel computing --------------------------------- - -.. automodule:: sklearn.utils.parallel - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.parallel.delayed - utils.parallel_backend - utils.register_parallel_backend - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.parallel.Parallel - - -Recently deprecated -=================== diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst index 712a094a43246..ee7028f469b5f 100644 --- a/doc/modules/classification_threshold.rst +++ b/doc/modules/classification_threshold.rst @@ -15,12 +15,12 @@ Let's take a straightforward example related to weather forecasting: the first p related to answering "what is the chance that it will rain tomorrow?" while the second point is related to answering "should I take an umbrella tomorrow?". -When it comes to the scikit-learn API, the first point is addressed providing scores +When it comes to the scikit-learn API, the first point is addressed by providing scores using :term:`predict_proba` or :term:`decision_function`. The former returns conditional probability estimates :math:`P(y|X)` for each class, while the latter returns a decision score for each class. -The decision corresponding to the labels are obtained with :term:`predict`. In binary +The decision corresponding to the labels is obtained with :term:`predict`. In binary classification, a decision rule or action is then defined by thresholding the scores, leading to the prediction of a single class label for each sample. For binary classification in scikit-learn, class labels predictions are obtained by hard-coded @@ -38,8 +38,8 @@ probability estimates :math:`P(y|X)` and class labels:: >>> classifier.predict_proba(X[:4]) array([[0.94 , 0.06 ], [0.94 , 0.06 ], - [0.0416..., 0.9583...], - [0.0416..., 0.9583...]]) + [0.0416, 0.9583], + [0.0416, 0.9583]]) >>> classifier.predict(X[:4]) array([0, 0, 1, 1]) @@ -97,7 +97,7 @@ a meaningful metric for their use case. the label of the class of interest (i.e. `pos_label`). Thus, if this label is not the right one for your application, you need to define a scorer and pass the right `pos_label` (and additional parameters) using the - :func:`~sklearn.metrics.make_scorer`. Refer to :ref:`scoring` to get + :func:`~sklearn.metrics.make_scorer`. Refer to :ref:`scoring_callable` to get information to define your own scoring function. For instance, we show how to pass the information to the scorer that the label of interest is `0` when maximizing the :func:`~sklearn.metrics.f1_score`:: @@ -112,10 +112,10 @@ a meaningful metric for their use case. >>> base_model = LogisticRegression() >>> model = TunedThresholdClassifierCV(base_model, scoring=scorer) >>> scorer(model.fit(X, y), X, y) - 0.88... + 0.88 >>> # compare it with the internal score found by cross-validation >>> model.best_score_ - 0.86... + np.float64(0.86) Important notes regarding the internal cross-validation ------------------------------------------------------- @@ -143,7 +143,10 @@ Manually setting the decision threshold The previous sections discussed strategies to find an optimal decision threshold. It is also possible to manually set the decision threshold using the class -:class:`~sklearn.model_selection.FixedThresholdClassifier`. +:class:`~sklearn.model_selection.FixedThresholdClassifier`. In case that you don't want +to refit the model when calling `fit`, wrap your sub-estimator with a +:class:`~sklearn.frozen.FrozenEstimator` and do +``FixedThresholdClassifier(FrozenEstimator(estimator), ...)``. Examples -------- diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index ed27b369171e5..cdf8421a103e3 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -140,6 +140,11 @@ model with equal covariance per component. :term:`inductive` clustering methods) are not designed to be applied to new, unseen data. +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_cluster_plot_inductive_clustering.py`: An example + of an inductive clustering model for handling new data. + .. _k_means: K-means @@ -222,9 +227,10 @@ initializations of the centroids. One method to help address this issue is the k-means++ initialization scheme, which has been implemented in scikit-learn (use the ``init='k-means++'`` parameter). This initializes the centroids to be (generally) distant from each other, leading to probably better results than -random initialization, as shown in the reference. For a detailed example of -comaparing different initialization schemes, refer to -:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`. +random initialization, as shown in the reference. For detailed examples of +comparing different initialization schemes, refer to +:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py` and +:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_stability_low_dim_dense.py`. K-means++ can also be called independently to select seeds for other clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details @@ -236,18 +242,13 @@ computing cluster centers and values of inertia. For example, assigning a weight of 2 to a sample is equivalent to adding a duplicate of that sample to the dataset :math:`X`. -K-means can be used for vector quantization. This is achieved using the -``transform`` method of a trained model of :class:`KMeans`. For an example of -performing vector quantization on an image refer to -:ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`. - -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_cluster_iris.py`: Example usage of - :class:`KMeans` using the iris dataset +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering - using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_plusplus.py`: Using K-means++ + to select seeds for other clustering algorithms. Low-level parallelism --------------------- @@ -257,24 +258,20 @@ chunks of data (256 samples) are processed in parallel, which in addition yields a low memory footprint. For more details on how to control the number of threads, please refer to our :ref:`parallelism` notes. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating - when k-means performs intuitively and when it does not - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering - handwritten digits +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating when + k-means performs intuitively and when it does not +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering handwritten digits +.. dropdown:: References -|details-start| -**References** -|details-split| + * `"k-means++: The advantages of careful seeding" + `_ + Arthur, David, and Sergei Vassilvitskii, + *Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete + algorithms*, Society for Industrial and Applied Mathematics (2007) -* `"k-means++: The advantages of careful seeding" - `_ Arthur, David, and - Sergei Vassilvitskii, *Proceedings of the eighteenth annual ACM-SIAM symposium - on Discrete algorithms*, Society for Industrial and Applied Mathematics (2007) - -|details-end| .. _mini_batch_kmeans: @@ -310,24 +307,22 @@ small, as shown in the example and cited reference. :scale: 100 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of - :class:`KMeans` and :class:`MiniBatchKMeans` +* :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of + :class:`KMeans` and :class:`MiniBatchKMeans` - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering - using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data -|details-start| -**References** -|details-split| +* :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` -* `"Web Scale K-Means clustering" - `_ - D. Sculley, *Proceedings of the 19th international conference on World - wide web* (2010) +.. dropdown:: References -|details-end| + * `"Web Scale K-Means clustering" + `_ + D. Sculley, *Proceedings of the 19th international conference on World + wide web* (2010) .. _affinity_propagation: @@ -364,55 +359,50 @@ convergence. Further, the memory complexity is of the order sparse similarity matrix is used. This makes Affinity Propagation most appropriate for small to medium sized datasets. -|details-start| -**Algorithm description** -|details-split| - -The messages sent between points belong to one of two categories. The first is -the responsibility :math:`r(i, k)`, which is the accumulated evidence that -sample :math:`k` should be the exemplar for sample :math:`i`. The second is the -availability :math:`a(i, k)` which is the accumulated evidence that sample -:math:`i` should choose sample :math:`k` to be its exemplar, and considers the -values for all other samples that :math:`k` should be an exemplar. In this way, -exemplars are chosen by samples if they are (1) similar enough to many samples -and (2) chosen by many samples to be representative of themselves. +.. dropdown:: Algorithm description -More formally, the responsibility of a sample :math:`k` to be the exemplar of -sample :math:`i` is given by: + The messages sent between points belong to one of two categories. The first is + the responsibility :math:`r(i, k)`, which is the accumulated evidence that + sample :math:`k` should be the exemplar for sample :math:`i`. The second is the + availability :math:`a(i, k)` which is the accumulated evidence that sample + :math:`i` should choose sample :math:`k` to be its exemplar, and considers the + values for all other samples that :math:`k` should be an exemplar. In this way, + exemplars are chosen by samples if they are (1) similar enough to many samples + and (2) chosen by many samples to be representative of themselves. -.. math:: - - r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] + More formally, the responsibility of a sample :math:`k` to be the exemplar of + sample :math:`i` is given by: -Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. -The availability of sample :math:`k` to be the exemplar of sample :math:`i` is -given by: + .. math:: -.. math:: + r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] - a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', - k)}] + Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. + The availability of sample :math:`k` to be the exemplar of sample :math:`i` is + given by: -To begin with, all values for :math:`r` and :math:`a` are set to zero, and the -calculation of each iterates until convergence. As discussed above, in order to -avoid numerical oscillations when updating the messages, the damping factor -:math:`\lambda` is introduced to iteration process: + .. math:: -.. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) -.. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) + a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', + k)}] -where :math:`t` indicates the iteration times. + To begin with, all values for :math:`r` and :math:`a` are set to zero, and the + calculation of each iterates until convergence. As discussed above, in order to + avoid numerical oscillations when updating the messages, the damping factor + :math:`\lambda` is introduced to iteration process: -|details-end| + .. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) + .. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) + where :math:`t` indicates the iteration times. -.. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity - Propagation on a synthetic 2D datasets with 3 classes. +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity - Propagation on Financial time series to find groups of companies +* :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity + Propagation on a synthetic 2D datasets with 3 classes +* :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity Propagation + on financial time series to find groups of companies .. _mean_shift: @@ -425,43 +415,40 @@ for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids. -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -The position of centroid candidates is iteratively adjusted using a technique -called hill climbing, which finds local maxima of the estimated probability -density. Given a candidate centroid :math:`x` for iteration :math:`t`, the -candidate is updated according to the following equation: + The position of centroid candidates is iteratively adjusted using a technique + called hill climbing, which finds local maxima of the estimated probability + density. Given a candidate centroid :math:`x` for iteration :math:`t`, the + candidate is updated according to the following equation: -.. math:: + .. math:: - x^{t+1} = x^t + m(x^t) + x^{t+1} = x^t + m(x^t) -Where :math:`m` is the *mean shift* vector that is computed for each centroid -that points towards a region of the maximum increase in the density of points. -To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples -within a given distance around :math:`x`. Then :math:`m` is computed using the -following equation, effectively updating a centroid to be the mean of the -samples within its neighborhood: + Where :math:`m` is the *mean shift* vector that is computed for each centroid + that points towards a region of the maximum increase in the density of points. + To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples + within a given distance around :math:`x`. Then :math:`m` is computed using the + following equation, effectively updating a centroid to be the mean of the + samples within its neighborhood: -.. math:: + .. math:: - m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x + m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x -In general, the equation for :math:`m` depends on a kernel used for density -estimation. The generic formula is: + In general, the equation for :math:`m` depends on a kernel used for density + estimation. The generic formula is: -.. math:: + .. math:: - m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - - x)} - x + m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - + x)} - x -In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough -and is equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether -:math:`y` is in the neighborhood of :math:`x`. + In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough + and is equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether + :math:`y` is in the neighborhood of :math:`x`. -|details-end| The algorithm automatically sets the number of clusters, instead of relying on a parameter ``bandwidth``, which dictates the size of the region to search through. @@ -483,21 +470,17 @@ given sample. :scale: 50 -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift - clustering on a synthetic 2D datasets with 3 classes. +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift clustering + on a synthetic 2D datasets with 3 classes. -|details-start| -**References** -|details-split| +.. dropdown:: References -* :doi:`"Mean shift: A robust approach toward feature space analysis" - <10.1109/34.1000236>` D. Comaniciu and P. Meer, *IEEE Transactions on Pattern - Analysis and Machine Intelligence* (2002) + * :doi:`"Mean shift: A robust approach toward feature space analysis" + <10.1109/34.1000236>` D. Comaniciu and P. Meer, *IEEE Transactions on Pattern + Analysis and Machine Intelligence* (2002) -|details-end| .. _spectral_clustering: @@ -547,13 +530,13 @@ computed using a function of a gradient of the image. See the examples for such an application. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting - objects from a noisy background using spectral clustering. +* :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting objects + from a noisy background using spectral clustering. +* :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral clustering + to split the image of coins in regions. - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral - clustering to split the image of coins in regions. .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png :target: ../auto_examples/cluster/plot_coin_segmentation.html @@ -588,18 +571,15 @@ below. |coin_kmeans| |coin_discretize| |coin_cluster_qr| ================================ ================================ ================================ -|details-start| -**References** -|details-split| +.. dropdown:: References -* `"Multiclass spectral clustering" - `_ - Stella X. Yu, Jianbo Shi, 2003 + * `"Multiclass spectral clustering" + `_ + Stella X. Yu, Jianbo Shi, 2003 -* :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` - Anil Damle, Victor Minden, Lexing Ying, 2019 + * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` + Anil Damle, Victor Minden, Lexing Ying, 2019 -|details-end| .. _spectral_clustering_graph: @@ -615,28 +595,25 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`:: ... assign_labels='discretize') >>> sc.fit_predict(adjacency_matrix) # doctest: +SKIP -|details-start| -**References** -|details-split| +.. dropdown:: References -* :doi:`"A Tutorial on Spectral Clustering" <10.1007/s11222-007-9033-z>` Ulrike - von Luxburg, 2007 + * :doi:`"A Tutorial on Spectral Clustering" <10.1007/s11222-007-9033-z>` Ulrike + von Luxburg, 2007 -* :doi:`"Normalized cuts and image segmentation" <10.1109/34.868688>` Jianbo - Shi, Jitendra Malik, 2000 + * :doi:`"Normalized cuts and image segmentation" <10.1109/34.868688>` Jianbo + Shi, Jitendra Malik, 2000 -* `"A Random Walks View of Spectral Segmentation" - `_ - Marina Meila, Jianbo Shi, 2001 + * `"A Random Walks View of Spectral Segmentation" + `_ + Marina Meila, Jianbo Shi, 2001 -* `"On Spectral Clustering: Analysis and an algorithm" - `_ - Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 + * `"On Spectral Clustering: Analysis and an algorithm" + `_ + Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 -* :arxiv:`"Preconditioned Spectral Clustering for Stochastic Block Partition - Streaming Graph Challenge" <1708.07481>` David Zhuzhunashvili, Andrew Knyazev + * :arxiv:`"Preconditioned Spectral Clustering for Stochastic Block Partition + Streaming Graph Challenge" <1708.07481>` David Zhuzhunashvili, Andrew Knyazev -|details-end| .. _hierarchical_clustering: @@ -697,10 +674,10 @@ while not robust to noisy data, can be computed very efficiently and can therefore be useful to provide hierarchical clustering of larger datasets. Single linkage can also perform well on non-globular data. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of - the different linkage strategies in a real dataset. +* :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of the + different linkage strategies in a real dataset. * :ref:`sphx_glr_auto_examples_cluster_plot_linkage_comparison.py`: exploration of the different linkage strategies in toy datasets. @@ -717,9 +694,9 @@ of the data, though more so in the case of small sample sizes. :target: ../auto_examples/cluster/plot_agglomerative_dendrogram.html :scale: 42 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_dendrogram.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_dendrogram.py` Adding connectivity constraints @@ -788,20 +765,20 @@ enable only merging of neighboring pixels on an image, as in the :target: ../auto_examples/cluster/plot_agglomerative_clustering.html :scale: 38 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward - clustering to split the image of coins in regions. +* :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward + clustering to split the image of coins in regions. - * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example - of Ward algorithm on a swiss-roll, comparison of structured approaches - versus unstructured approaches. +* :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example + of Ward algorithm on a swiss-roll, comparison of structured approaches + versus unstructured approaches. - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: Example - of dimensionality reduction with feature agglomeration based on Ward - hierarchical clustering. +* :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: Example + of dimensionality reduction with feature agglomeration based on Ward + hierarchical clustering. - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` Varying the metric @@ -835,9 +812,9 @@ each class. :target: ../auto_examples/cluster/plot_agglomerative_clustering_metrics.html :scale: 32 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` Bisecting K-Means @@ -881,26 +858,23 @@ Difference between Bisecting K-Means and regular K-Means can be seen on example While the regular K-Means algorithm tends to create non-related clusters, clusters from Bisecting K-Means are well ordered and create quite a visible hierarchy. -|details-start| -**References** -|details-split| - -* `"A Comparison of Document Clustering Techniques" - `_ Michael - Steinbach, George Karypis and Vipin Kumar, Department of Computer Science and - Egineering, University of Minnesota (June 2000) -* `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog - Data" - `_ - K.Abirami and Dr.P.Mayilvahanan, International Journal of Emerging - Technologies in Engineering Research (IJETER) Volume 4, Issue 8, (August 2016) -* `"Bisecting K-means Algorithm Based on K-valued Self-determining and - Clustering Center Optimization" - `_ Jian Di, Xinyue Gou School - of Control and Computer Engineering,North China Electric Power University, - Baoding, Hebei, China (August 2017) - -|details-end| +.. dropdown:: References + + * `"A Comparison of Document Clustering Techniques" + `_ Michael + Steinbach, George Karypis and Vipin Kumar, Department of Computer Science and + Egineering, University of Minnesota (June 2000) + * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog + Data" + `_ + K.Abirami and Dr.P.Mayilvahanan, International Journal of Emerging + Technologies in Engineering Research (IJETER) Volume 4, Issue 8, (August 2016) + * `"Bisecting K-means Algorithm Based on K-valued Self-determining and + Clustering Center Optimization" + `_ Jian Di, Xinyue Gou School + of Control and Computer Engineering,North China Electric Power University, + Baoding, Hebei, China (August 2017) + .. _dbscan: @@ -954,79 +928,68 @@ samples that are still part of a cluster. Moreover, the outliers are indicated by black points below. .. |dbscan_results| image:: ../auto_examples/cluster/images/sphx_glr_plot_dbscan_002.png - :target: ../auto_examples/cluster/plot_dbscan.html - :scale: 50 + :target: ../auto_examples/cluster/plot_dbscan.html + :scale: 50 .. centered:: |dbscan_results| -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` +.. rubric:: Examples -|details-start| -**Implementation** -|details-split| +* :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` -The DBSCAN algorithm is deterministic, always generating the same clusters when -given the same data in the same order. However, the results can differ when -data is provided in a different order. First, even though the core samples will -always be assigned to the same clusters, the labels of those clusters will -depend on the order in which those samples are encountered in the data. Second -and more importantly, the clusters to which non-core samples are assigned can -differ depending on the data order. This would happen when a non-core sample -has a distance lower than ``eps`` to two core samples in different clusters. By -the triangular inequality, those two core samples must be more distant than -``eps`` from each other, or they would be in the same cluster. The non-core -sample is assigned to whichever cluster is generated first in a pass through the -data, and so the results will depend on the data ordering. +.. dropdown:: Implementation -The current implementation uses ball trees and kd-trees to determine the -neighborhood of points, which avoids calculating the full distance matrix (as -was done in scikit-learn versions before 0.14). The possibility to use custom -metrics is retained; for details, see :class:`NearestNeighbors`. + The DBSCAN algorithm is deterministic, always generating the same clusters when + given the same data in the same order. However, the results can differ when + data is provided in a different order. First, even though the core samples will + always be assigned to the same clusters, the labels of those clusters will + depend on the order in which those samples are encountered in the data. Second + and more importantly, the clusters to which non-core samples are assigned can + differ depending on the data order. This would happen when a non-core sample + has a distance lower than ``eps`` to two core samples in different clusters. By + the triangular inequality, those two core samples must be more distant than + ``eps`` from each other, or they would be in the same cluster. The non-core + sample is assigned to whichever cluster is generated first in a pass through the + data, and so the results will depend on the data ordering. -|details-end| + The current implementation uses ball trees and kd-trees to determine the + neighborhood of points, which avoids calculating the full distance matrix (as + was done in scikit-learn versions before 0.14). The possibility to use custom + metrics is retained; for details, see :class:`NearestNeighbors`. -|details-start| -**Memory consumption for large sample sizes** -|details-split| +.. dropdown:: Memory consumption for large sample sizes -This implementation is by default not memory efficient because it constructs a -full pairwise similarity matrix in the case where kd-trees or ball-trees cannot -be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` -floats. A couple of mechanisms for getting around this are: + This implementation is by default not memory efficient because it constructs a + full pairwise similarity matrix in the case where kd-trees or ball-trees cannot + be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` + floats. A couple of mechanisms for getting around this are: -- Use :ref:`OPTICS ` clustering in conjunction with the `extract_dbscan` - method. OPTICS clustering also calculates the full pairwise matrix, but only - keeps one row in memory at a time (memory complexity n). + - Use :ref:`OPTICS ` clustering in conjunction with the `extract_dbscan` + method. OPTICS clustering also calculates the full pairwise matrix, but only + keeps one row in memory at a time (memory complexity n). -- A sparse radius neighborhood graph (where missing entries are presumed to be - out of eps) can be precomputed in a memory-efficient way and dbscan can be run - over this with ``metric='precomputed'``. See - :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. + - A sparse radius neighborhood graph (where missing entries are presumed to be + out of eps) can be precomputed in a memory-efficient way and dbscan can be run + over this with ``metric='precomputed'``. See + :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. -- The dataset can be compressed, either by removing exact duplicates if these - occur in your data, or by using BIRCH. Then you only have a relatively small - number of representatives for a large number of points. You can then provide a - ``sample_weight`` when fitting DBSCAN. + - The dataset can be compressed, either by removing exact duplicates if these + occur in your data, or by using BIRCH. Then you only have a relatively small + number of representatives for a large number of points. You can then provide a + ``sample_weight`` when fitting DBSCAN. -|details-end| - -|details-start| -**References** -|details-split| +.. dropdown:: References * `A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise `_ Ester, M., H. P. Kriegel, J. Sander, and X. Xu, In Proceedings of the 2nd International Conference on Knowledge Discovery and Data Mining, Portland, OR, - AAAI Press, pp. 226–231. 1996 + AAAI Press, pp. 226-231. 1996 * :doi:`DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. <10.1145/3068335>` Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017). In ACM Transactions on Database Systems (TODS), 42(3), 19. -|details-end| .. _hdbscan: @@ -1046,9 +1009,9 @@ scales by building an alternative representation of the clustering problem. This implementation is adapted from the original implementation of HDBSCAN, `scikit-learn-contrib/hdbscan `_ based on [LJ2017]_. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_hdbscan.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_hdbscan.py` Mutual Reachability Graph ------------------------- @@ -1109,11 +1072,11 @@ it relies solely on the choice of `min_samples`, which tends to be a more robust hyperparameter. .. |hdbscan_ground_truth| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_005.png - :target: ../auto_examples/cluster/plot_hdbscan.html - :scale: 75 + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 .. |hdbscan_results| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_007.png - :target: ../auto_examples/cluster/plot_hdbscan.html - :scale: 75 + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 .. centered:: |hdbscan_ground_truth| .. centered:: |hdbscan_results| @@ -1124,19 +1087,19 @@ than `minimum_cluster_size` many samples are considered noise. In practice, one can set `minimum_cluster_size = min_samples` to couple the parameters and simplify the hyperparameter space. -.. topic:: References: +.. rubric:: References - .. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based - Clustering Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., - Cao, L., Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data - Mining. PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, - Berlin, Heidelberg. :doi:`Density-Based Clustering Based on Hierarchical - Density Estimates <10.1007/978-3-642-37456-2_14>` +.. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based + Clustering Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., + Cao, L., Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data + Mining. PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, + Berlin, Heidelberg. :doi:`Density-Based Clustering Based on Hierarchical + Density Estimates <10.1007/978-3-642-37456-2_14>` - .. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density - Based Clustering. In: IEEE International Conference on Data Mining Workshops - (ICDMW), 2017, pp. 33-42. :doi:`Accelerated Hierarchical Density Based - Clustering <10.1109/ICDMW.2017.12>` +.. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density + Based Clustering. In: IEEE International Conference on Data Mining Workshops + (ICDMW), 2017, pp. 33-42. :doi:`Accelerated Hierarchical Density Based + Clustering <10.1109/ICDMW.2017.12>` .. _optics: @@ -1182,58 +1145,48 @@ the linear segment clusters of the reachability plot. Note that the blue and red clusters are adjacent in the reachability plot, and can be hierarchically represented as children of a larger parent cluster. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` -|details-start| -**Comparison with DBSCAN** -|details-split| -The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are very -similar, but not always identical; specifically, labeling of periphery and noise -points. This is in part because the first samples of each dense area processed -by OPTICS have a large reachability value while being close to other points in -their area, and will thus sometimes be marked as noise rather than periphery. -This affects adjacent points when they are considered as candidates for being -marked as either periphery or noise. +.. dropdown:: Comparison with DBSCAN -Note that for any single value of ``eps``, DBSCAN will tend to have a shorter -run time than OPTICS; however, for repeated runs at varying ``eps`` values, a -single run of OPTICS may require less cumulative runtime than DBSCAN. It is also -important to note that OPTICS' output is close to DBSCAN's only if ``eps`` and -``max_eps`` are close. + The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are very + similar, but not always identical; specifically, labeling of periphery and noise + points. This is in part because the first samples of each dense area processed + by OPTICS have a large reachability value while being close to other points in + their area, and will thus sometimes be marked as noise rather than periphery. + This affects adjacent points when they are considered as candidates for being + marked as either periphery or noise. -|details-end| + Note that for any single value of ``eps``, DBSCAN will tend to have a shorter + run time than OPTICS; however, for repeated runs at varying ``eps`` values, a + single run of OPTICS may require less cumulative runtime than DBSCAN. It is also + important to note that OPTICS' output is close to DBSCAN's only if ``eps`` and + ``max_eps`` are close. -|details-start| -**Computational Complexity** -|details-split| +.. dropdown:: Computational Complexity -Spatial indexing trees are used to avoid calculating the full distance matrix, -and allow for efficient memory usage on large sets of samples. Different -distance metrics can be supplied via the ``metric`` keyword. + Spatial indexing trees are used to avoid calculating the full distance matrix, + and allow for efficient memory usage on large sets of samples. Different + distance metrics can be supplied via the ``metric`` keyword. -For large datasets, similar (but not identical) results can be obtained via -:class:`HDBSCAN`. The HDBSCAN implementation is multithreaded, and has better -algorithmic runtime complexity than OPTICS, at the cost of worse memory scaling. -For extremely large datasets that exhaust system memory using HDBSCAN, OPTICS -will maintain :math:`n` (as opposed to :math:`n^2`) memory scaling; however, -tuning of the ``max_eps`` parameter will likely need to be used to give a -solution in a reasonable amount of wall time. + For large datasets, similar (but not identical) results can be obtained via + :class:`HDBSCAN`. The HDBSCAN implementation is multithreaded, and has better + algorithmic runtime complexity than OPTICS, at the cost of worse memory scaling. + For extremely large datasets that exhaust system memory using HDBSCAN, OPTICS + will maintain :math:`n` (as opposed to :math:`n^2`) memory scaling; however, + tuning of the ``max_eps`` parameter will likely need to be used to give a + solution in a reasonable amount of wall time. -|details-end| -|details-start| -**References** -|details-split| +.. dropdown:: References -* "OPTICS: ordering points to identify the clustering structure." Ankerst, - Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. In ACM Sigmod - Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. + * "OPTICS: ordering points to identify the clustering structure." Ankerst, + Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. In ACM Sigmod + Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. -|details-end| .. _birch: @@ -1269,75 +1222,60 @@ If ``n_clusters`` is set to None, the subclusters from the leaves are directly read off, otherwise a global clustering step labels these subclusters into global clusters (labels) and the samples are mapped to the global label of the nearest subcluster. -|details-start| -**Algorithm description** -|details-split| - -- A new sample is inserted into the root of the CF Tree which is a CF Node. It - is then merged with the subcluster of the root, that has the smallest radius - after merging, constrained by the threshold and branching factor conditions. - If the subcluster has any child node, then this is done repeatedly till it - reaches a leaf. After finding the nearest subcluster in the leaf, the - properties of this subcluster and the parent subclusters are recursively - updated. - -- If the radius of the subcluster obtained by merging the new sample and the - nearest subcluster is greater than the square of the threshold and if the - number of subclusters is greater than the branching factor, then a space is - temporarily allocated to this new sample. The two farthest subclusters are - taken and the subclusters are divided into two groups on the basis of the - distance between these subclusters. - -- If this split node has a parent subcluster and there is room for a new - subcluster, then the parent is split into two. If there is no room, then this - node is again split into two and the process is continued recursively, till it - reaches the root. - -|details-end| - -|details-start| -**BIRCH or MiniBatchKMeans?** -|details-split| - -- BIRCH does not scale very well to high dimensional data. As a rule of thumb if - ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. -- If the number of instances of data needs to be reduced, or if one wants a - large number of subclusters either as a preprocessing step or otherwise, - BIRCH is more useful than MiniBatchKMeans. - -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png +.. dropdown:: Algorithm description + + - A new sample is inserted into the root of the CF Tree which is a CF Node. It + is then merged with the subcluster of the root, that has the smallest radius + after merging, constrained by the threshold and branching factor conditions. + If the subcluster has any child node, then this is done repeatedly till it + reaches a leaf. After finding the nearest subcluster in the leaf, the + properties of this subcluster and the parent subclusters are recursively + updated. + + - If the radius of the subcluster obtained by merging the new sample and the + nearest subcluster is greater than the square of the threshold and if the + number of subclusters is greater than the branching factor, then a space is + temporarily allocated to this new sample. The two farthest subclusters are + taken and the subclusters are divided into two groups on the basis of the + distance between these subclusters. + + - If this split node has a parent subcluster and there is room for a new + subcluster, then the parent is split into two. If there is no room, then this + node is again split into two and the process is continued recursively, till it + reaches the root. + +.. dropdown:: BIRCH or MiniBatchKMeans? + + - BIRCH does not scale very well to high dimensional data. As a rule of thumb if + ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. + - If the number of instances of data needs to be reduced, or if one wants a + large number of subclusters either as a preprocessing step or otherwise, + BIRCH is more useful than MiniBatchKMeans. + + .. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html -|details-end| +.. dropdown:: How to use partial_fit? -|details-start| -**How to use partial_fit?** -|details-split| + To avoid the computation of global clustering, for every call of ``partial_fit`` + the user is advised: -To avoid the computation of global clustering, for every call of ``partial_fit`` -the user is advised + 1. To set ``n_clusters=None`` initially. + 2. Train all data by multiple calls to partial_fit. + 3. Set ``n_clusters`` to a required value using + ``brc.set_params(n_clusters=n_clusters)``. + 4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` + which performs the global clustering. -1. To set ``n_clusters=None`` initially -2. Train all data by multiple calls to partial_fit. -3. Set ``n_clusters`` to a required value using - ``brc.set_params(n_clusters=n_clusters)``. -4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` - which performs the global clustering. +.. dropdown:: References -|details-end| + * Tian Zhang, Raghu Ramakrishnan, Maron Livny BIRCH: An efficient data + clustering method for large databases. + https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf -|details-start| -**References** -|details-split| + * Roberto Perdisci JBirch - Java implementation of BIRCH clustering algorithm + https://code.google.com/archive/p/jbirch -* Tian Zhang, Raghu Ramakrishnan, Maron Livny BIRCH: An efficient data - clustering method for large databases. - https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf - -* Roberto Perdisci JBirch - Java implementation of BIRCH clustering algorithm - https://code.google.com/archive/p/jbirch - -|details-end| .. _clustering_evaluation: @@ -1372,32 +1310,32 @@ ignoring permutations:: >>> labels_true = [0, 0, 0, 1, 1, 1] >>> labels_pred = [0, 0, 1, 1, 2, 2] >>> metrics.rand_score(labels_true, labels_pred) - 0.66... + 0.66 The Rand index does not ensure to obtain a value close to 0.0 for a random labelling. The adjusted Rand index **corrects for chance** and will give such a baseline. >>> metrics.adjusted_rand_score(labels_true, labels_pred) - 0.24... + 0.24 As with all clustering metrics, one can permute 0 and 1 in the predicted labels, rename 2 to 3, and get the same score:: >>> labels_pred = [1, 1, 0, 0, 3, 3] >>> metrics.rand_score(labels_true, labels_pred) - 0.66... + 0.66 >>> metrics.adjusted_rand_score(labels_true, labels_pred) - 0.24... + 0.24 -Furthermore, both :func:`rand_score` :func:`adjusted_rand_score` are +Furthermore, both :func:`rand_score` and :func:`adjusted_rand_score` are **symmetric**: swapping the argument does not change the scores. They can thus be used as **consensus measures**:: >>> metrics.rand_score(labels_pred, labels_true) - 0.66... + 0.66 >>> metrics.adjusted_rand_score(labels_pred, labels_true) - 0.24... + 0.24 Perfect labeling is scored 1.0:: @@ -1410,14 +1348,14 @@ Perfect labeling is scored 1.0:: Poorly agreeing labels (e.g. independent labelings) have lower scores, and for the adjusted Rand index the score will be negative or close to zero. However, for the unadjusted Rand index the score, while lower, -will not necessarily be close to zero.:: +will not necessarily be close to zero:: >>> labels_true = [0, 0, 0, 0, 0, 0, 1, 1] >>> labels_pred = [0, 1, 2, 3, 4, 5, 5, 6] >>> metrics.rand_score(labels_true, labels_pred) - 0.39... + 0.39 >>> metrics.adjusted_rand_score(labels_true, labels_pred) - -0.07... + -0.072 .. topic:: Advantages: @@ -1433,7 +1371,7 @@ will not necessarily be close to zero.:: - **Bounded range**: Lower values indicate different labelings, similar clusterings have a high (adjusted or unadjusted) Rand index, 1.0 is the perfect match score. The score range is [0, 1] for the unadjusted Rand index - and [-1, 1] for the adjusted Rand index. + and [-0.5, 1] for the adjusted Rand index. - **No assumption is made on the cluster structure**: The (adjusted or unadjusted) Rand index can be used to compare all kinds of clustering @@ -1460,64 +1398,55 @@ will not necessarily be close to zero.:: ground truth clustering resulting in a high proportion of pair labels that agree, which leads subsequently to a high score. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: - Analysis of the impact of the dataset size on the value of clustering measures - for random assignments. +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: + Analysis of the impact of the dataset size on the value of + clustering measures for random assignments. -|details-start| -**Mathematical formulation** -|details-split| +.. dropdown:: Mathematical formulation -If C is a ground truth class assignment and K the clustering, let us define -:math:`a` and :math:`b` as: + If C is a ground truth class assignment and K the clustering, let us define + :math:`a` and :math:`b` as: -- :math:`a`, the number of pairs of elements that are in the same set in C and - in the same set in K + - :math:`a`, the number of pairs of elements that are in the same set in C and + in the same set in K -- :math:`b`, the number of pairs of elements that are in different sets in C and - in different sets in K + - :math:`b`, the number of pairs of elements that are in different sets in C and + in different sets in K -The unadjusted Rand index is then given by: + The unadjusted Rand index is then given by: -.. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} + .. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} -where :math:`C_2^{n_{samples}}` is the total number of possible pairs in the -dataset. It does not matter if the calculation is performed on ordered pairs or -unordered pairs as long as the calculation is performed consistently. + where :math:`C_2^{n_{samples}}` is the total number of possible pairs in the + dataset. It does not matter if the calculation is performed on ordered pairs or + unordered pairs as long as the calculation is performed consistently. -However, the Rand index does not guarantee that random label assignments will -get a value close to zero (esp. if the number of clusters is in the same order -of magnitude as the number of samples). + However, the Rand index does not guarantee that random label assignments will + get a value close to zero (esp. if the number of clusters is in the same order + of magnitude as the number of samples). -To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of -random labelings by defining the adjusted Rand index as follows: + To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of + random labelings by defining the adjusted Rand index as follows: -.. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} + .. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} -|details-end| +.. dropdown:: References -|details-start| -**References** -|details-split| + * `Comparing Partitions + `_ L. Hubert and P. + Arabie, Journal of Classification 1985 -* `Comparing Partitions - `_ L. Hubert and P. - Arabie, Journal of Classification 1985 + * `Properties of the Hubert-Arabie adjusted Rand index + `_ D. Steinley, Psychological + Methods 2004 -* `Properties of the Hubert-Arabie adjusted Rand index - `_ D. Steinley, Psychological - Methods 2004 + * `Wikipedia entry for the Rand index + `_ -* `Wikipedia entry for the Rand index - `_ + * :doi:`Minimum adjusted Rand index for two clusterings of a given size, 2022, J. E. Chacón and A. I. Rastrojo <10.1007/s11634-022-00491-w>` -* `Wikipedia entry for the adjusted Rand index - `_ - -|details-end| .. _mutual_info_score: @@ -1537,21 +1466,21 @@ proposed more recently and is **normalized against chance**:: >>> labels_pred = [0, 0, 1, 1, 2, 2] >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred) # doctest: +SKIP - 0.22504... + 0.22504 One can permute 0 and 1 in the predicted labels, rename 2 to 3 and get the same score:: >>> labels_pred = [1, 1, 0, 0, 3, 3] >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred) # doctest: +SKIP - 0.22504... + 0.22504 All, :func:`mutual_info_score`, :func:`adjusted_mutual_info_score` and :func:`normalized_mutual_info_score` are symmetric: swapping the argument does not change the score. Thus they can be used as a **consensus measure**:: >>> metrics.adjusted_mutual_info_score(labels_pred, labels_true) # doctest: +SKIP - 0.22504... + 0.22504 Perfect labeling is scored 1.0:: @@ -1565,14 +1494,14 @@ Perfect labeling is scored 1.0:: This is not true for ``mutual_info_score``, which is therefore harder to judge:: >>> metrics.mutual_info_score(labels_true, labels_pred) # doctest: +SKIP - 0.69... + 0.69 Bad (e.g. independent labelings) have non-positive scores:: >>> labels_true = [0, 1, 2, 0, 3, 4, 5, 1] >>> labels_pred = [1, 1, 0, 0, 2, 2, 2, 2] >>> metrics.adjusted_mutual_info_score(labels_true, labels_pred) # doctest: +SKIP - -0.10526... + -0.10526 .. topic:: Advantages: @@ -1598,80 +1527,77 @@ Bad (e.g. independent labelings) have non-positive scores:: - NMI and MI are not adjusted against chance. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis - of the impact of the dataset size on the value of clustering measures for - random assignments. This example also includes the Adjusted Rand Index. +.. rubric:: Examples +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for random + assignments. This example also includes the Adjusted Rand Index. -|details-start| -**Mathematical formulation** -|details-split| +.. dropdown:: Mathematical formulation -Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. -Their entropy is the amount of uncertainty for a partition set, defined by: + Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. + Their entropy is the amount of uncertainty for a partition set, defined by: -.. math:: H(U) = - \sum_{i=1}^{|U|}P(i)\log(P(i)) + .. math:: H(U) = - \sum_{i=1}^{|U|}P(i)\log(P(i)) -where :math:`P(i) = |U_i| / N` is the probability that an object picked at -random from :math:`U` falls into class :math:`U_i`. Likewise for :math:`V`: + where :math:`P(i) = |U_i| / N` is the probability that an object picked at + random from :math:`U` falls into class :math:`U_i`. Likewise for :math:`V`: -.. math:: H(V) = - \sum_{j=1}^{|V|}P'(j)\log(P'(j)) + .. math:: H(V) = - \sum_{j=1}^{|V|}P'(j)\log(P'(j)) -With :math:`P'(j) = |V_j| / N`. The mutual information (MI) between :math:`U` -and :math:`V` is calculated by: + With :math:`P'(j) = |V_j| / N`. The mutual information (MI) between :math:`U` + and :math:`V` is calculated by: -.. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|}\sum_{j=1}^{|V|}P(i, j)\log\left(\frac{P(i,j)}{P(i)P'(j)}\right) + .. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|}\sum_{j=1}^{|V|}P(i, j)\log\left(\frac{P(i,j)}{P(i)P'(j)}\right) -where :math:`P(i, j) = |U_i \cap V_j| / N` is the probability that an object -picked at random falls into both classes :math:`U_i` and :math:`V_j`. + where :math:`P(i, j) = |U_i \cap V_j| / N` is the probability that an object + picked at random falls into both classes :math:`U_i` and :math:`V_j`. -It also can be expressed in set cardinality formulation: + It also can be expressed in set cardinality formulation: -.. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \frac{|U_i \cap V_j|}{N}\log\left(\frac{N|U_i \cap V_j|}{|U_i||V_j|}\right) + .. math:: \text{MI}(U, V) = \sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \frac{|U_i \cap V_j|}{N}\log\left(\frac{N|U_i \cap V_j|}{|U_i||V_j|}\right) -The normalized mutual information is defined as + The normalized mutual information is defined as -.. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\text{mean}(H(U), H(V))} + .. math:: \text{NMI}(U, V) = \frac{\text{MI}(U, V)}{\text{mean}(H(U), H(V))} -This value of the mutual information and also the normalized variant is not -adjusted for chance and will tend to increase as the number of different labels -(clusters) increases, regardless of the actual amount of "mutual information" -between the label assignments. + This value of the mutual information and also the normalized variant is not + adjusted for chance and will tend to increase as the number of different labels + (clusters) increases, regardless of the actual amount of "mutual information" + between the label assignments. -The expected value for the mutual information can be calculated using the -following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number -of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in -:math:`V_j`). + The expected value for the mutual information can be calculated using the + following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number + of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in + :math:`V_j`). -.. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ - }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) - \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! - (N-a_i-b_j+n_{ij})!} + .. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ + }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) + \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! + (N-a_i-b_j+n_{ij})!} -Using the expected value, the adjusted mutual information can then be calculated -using a similar form to that of the adjusted Rand index: + Using the expected value, the adjusted mutual information can then be calculated + using a similar form to that of the adjusted Rand index: -.. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} + .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} -For normalized mutual information and adjusted mutual information, the -normalizing value is typically some *generalized* mean of the entropies of each -clustering. Various generalized means exist, and no firm rules exist for -preferring one over the others. The decision is largely a field-by-field basis; -for instance, in community detection, the arithmetic mean is most common. Each -normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In -our implementation, this is controlled by the ``average_method`` parameter. + For normalized mutual information and adjusted mutual information, the + normalizing value is typically some *generalized* mean of the entropies of each + clustering. Various generalized means exist, and no firm rules exist for + preferring one over the others. The decision is largely a field-by-field basis; + for instance, in community detection, the arithmetic mean is most common. Each + normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In + our implementation, this is controlled by the ``average_method`` parameter. -Vinh et al. (2010) named variants of NMI and AMI by their averaging method -[VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic -means; we use these more broadly common names. + Vinh et al. (2010) named variants of NMI and AMI by their averaging method + [VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic + means; we use these more broadly common names. -.. topic:: References: + .. rubric:: References - * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a + * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles - a knowledge reuse framework for combining multiple partitions". Journal of - Machine Learning Research 3: 583–617. `doi:10.1162/153244303321897735 + Machine Learning Research 3: 583-617. `doi:10.1162/153244303321897735 `_. * `Wikipedia entry for the (normalized) Mutual Information @@ -1696,7 +1622,6 @@ means; we use these more broadly common names. Reports 6: 30750. `doi:10.1038/srep30750 `_. -|details-end| .. _homogeneity_completeness: @@ -1724,16 +1649,16 @@ We can turn those concept as scores :func:`homogeneity_score` and >>> labels_pred = [0, 0, 1, 1, 2, 2] >>> metrics.homogeneity_score(labels_true, labels_pred) - 0.66... + 0.66 >>> metrics.completeness_score(labels_true, labels_pred) - 0.42... + 0.42 Their harmonic mean called **V-measure** is computed by :func:`v_measure_score`:: >>> metrics.v_measure_score(labels_true, labels_pred) - 0.51... + 0.516 This function's formula is as follows: @@ -1742,12 +1667,12 @@ This function's formula is as follows: `beta` defaults to a value of 1.0, but for using a value less than 1 for beta:: >>> metrics.v_measure_score(labels_true, labels_pred, beta=0.6) - 0.54... + 0.547 more weight will be attributed to homogeneity, and using a value greater than 1:: >>> metrics.v_measure_score(labels_true, labels_pred, beta=1.8) - 0.48... + 0.48 more weight will be attributed to completeness. @@ -1758,14 +1683,14 @@ Homogeneity, completeness and V-measure can be computed at once using :func:`homogeneity_completeness_v_measure` as follows:: >>> metrics.homogeneity_completeness_v_measure(labels_true, labels_pred) - (0.66..., 0.42..., 0.51...) + (0.67, 0.42, 0.52) The following clustering assignment is slightly better, since it is homogeneous but not complete:: >>> labels_pred = [0, 0, 0, 1, 2, 2] >>> metrics.homogeneity_completeness_v_measure(labels_true, labels_pred) - (1.0, 0.68..., 0.81...) + (1.0, 0.68, 0.81) .. note:: @@ -1814,77 +1739,78 @@ homogeneous but not complete:: almost never available in practice or requires manual assignment by human annotators (as in the supervised learning setting). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis - of the impact of the dataset size on the value of clustering measures for - random assignments. +* :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. +.. dropdown:: Mathematical formulation -|details-start| -**Mathematical formulation** -|details-split| + Homogeneity and completeness scores are formally given by: -Homogeneity and completeness scores are formally given by: + .. math:: h = 1 - \frac{H(C|K)}{H(C)} -.. math:: h = 1 - \frac{H(C|K)}{H(C)} + .. math:: c = 1 - \frac{H(K|C)}{H(K)} -.. math:: c = 1 - \frac{H(K|C)}{H(K)} + where :math:`H(C|K)` is the **conditional entropy of the classes given the + cluster assignments** and is given by: -where :math:`H(C|K)` is the **conditional entropy of the classes given the -cluster assignments** and is given by: + .. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} + \cdot \log\left(\frac{n_{c,k}}{n_k}\right) -.. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} - \cdot \log\left(\frac{n_{c,k}}{n_k}\right) + and :math:`H(C)` is the **entropy of the classes** and is given by: -and :math:`H(C)` is the **entropy of the classes** and is given by: + .. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) -.. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) + with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` the + number of samples respectively belonging to class :math:`c` and cluster + :math:`k`, and finally :math:`n_{c,k}` the number of samples from class + :math:`c` assigned to cluster :math:`k`. -with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` the -number of samples respectively belonging to class :math:`c` and cluster -:math:`k`, and finally :math:`n_{c,k}` the number of samples from class -:math:`c` assigned to cluster :math:`k`. + The **conditional entropy of clusters given class** :math:`H(K|C)` and the + **entropy of clusters** :math:`H(K)` are defined in a symmetric manner. -The **conditional entropy of clusters given class** :math:`H(K|C)` and the -**entropy of clusters** :math:`H(K)` are defined in a symmetric manner. + Rosenberg and Hirschberg further define **V-measure** as the **harmonic mean of + homogeneity and completeness**: -Rosenberg and Hirschberg further define **V-measure** as the **harmonic mean of -homogeneity and completeness**: + .. math:: v = 2 \cdot \frac{h \cdot c}{h + c} -.. math:: v = 2 \cdot \frac{h \cdot c}{h + c} +.. rubric:: References -|details-end| +* `V-Measure: A conditional entropy-based external cluster evaluation measure + `_ Andrew Rosenberg and Julia + Hirschberg, 2007 -.. topic:: References: +.. [B2011] `Identification and Characterization of Events in Social Media + `_, Hila + Becker, PhD Thesis. - * `V-Measure: A conditional entropy-based external cluster evaluation measure - `_ Andrew Rosenberg and Julia - Hirschberg, 2007 - - .. [B2011] `Identification and Characterization of Events in Social Media - `_, Hila - Becker, PhD Thesis. .. _fowlkes_mallows_scores: Fowlkes-Mallows scores ---------------------- -The Fowlkes-Mallows index (:func:`sklearn.metrics.fowlkes_mallows_score`) can be -used when the ground truth class assignments of the samples is known. The -Fowlkes-Mallows score FMI is defined as the geometric mean of the -pairwise precision and recall: +The original Fowlkes-Mallows index (FMI) was intended to measure the similarity +between two clustering results, which is inherently an unsupervised comparison. +The supervised adaptation of the Fowlkes-Mallows index +(as implemented in :func:`sklearn.metrics.fowlkes_mallows_score`) can be used +when the ground truth class assignments of the samples are known. +The FMI is defined as the geometric mean of the pairwise precision and recall: .. math:: \text{FMI} = \frac{\text{TP}}{\sqrt{(\text{TP} + \text{FP}) (\text{TP} + \text{FN})}} -Where ``TP`` is the number of **True Positive** (i.e. the number of pair -of points that belong to the same clusters in both the true labels and the -predicted labels), ``FP`` is the number of **False Positive** (i.e. the number -of pair of points that belong to the same clusters in the true labels and not -in the predicted labels) and ``FN`` is the number of **False Negative** (i.e. the -number of pair of points that belongs in the same clusters in the predicted -labels and not in the true labels). +In the above formula: + +* ``TP`` (**True Positive**): The number of pairs of points that are clustered together + both in the true labels and in the predicted labels. + +* ``FP`` (**False Positive**): The number of pairs of points that are clustered together + in the predicted labels but not in the true labels. + +* ``FN`` (**False Negative**): The number of pairs of points that are clustered together + in the true labels but not in the predicted labels. The score ranges from 0 to 1. A high value indicates a good similarity between two clusters. @@ -1894,7 +1820,7 @@ between two clusters. >>> labels_pred = [0, 0, 1, 1, 2, 2] >>> metrics.fowlkes_mallows_score(labels_true, labels_pred) - 0.47140... + 0.47140 One can permute 0 and 1 in the predicted labels, rename 2 to 3 and get the same score:: @@ -1902,7 +1828,7 @@ the same score:: >>> labels_pred = [1, 1, 0, 0, 3, 3] >>> metrics.fowlkes_mallows_score(labels_true, labels_pred) - 0.47140... + 0.47140 Perfect labeling is scored 1.0:: @@ -1941,19 +1867,15 @@ Bad (e.g. independent labelings) have zero scores:: manual assignment by human annotators (as in the supervised learning setting). -|details-start| -**References** -|details-split| +.. dropdown:: References -* E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two - hierarchical clusterings". Journal of the American Statistical - Association. - https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 + * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two + hierarchical clusterings". Journal of the American Statistical Association. + https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 -* `Wikipedia entry for the Fowlkes-Mallows Index - `_ + * `Wikipedia entry for the Fowlkes-Mallows Index + `_ -|details-end| .. _silhouette_coefficient: @@ -1995,8 +1917,7 @@ cluster analysis. >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans_model.labels_ >>> metrics.silhouette_score(X, labels, metric='euclidean') - 0.55... - + 0.55 .. topic:: Advantages: @@ -2012,23 +1933,18 @@ cluster analysis. other concepts of clusters, such as density based clusters like those obtained through DBSCAN. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In - this example the silhouette analysis is used to choose an optimal value for - n_clusters. +* :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In + this example the silhouette analysis is used to choose an optimal value for + n_clusters. +.. dropdown:: References -|details-start| -**References** -|details-split| + * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the + Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>`. + Computational and Applied Mathematics 20: 53-65. -* Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the - Interpretation and Validation of Cluster - Analysis"<10.1016/0377-0427(87)90125-7>` . Computational and Applied - Mathematics 20: 53–65. - -|details-end| .. _calinski_harabasz_index: @@ -2058,7 +1974,7 @@ cluster analysis: >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans_model.labels_ >>> metrics.calinski_harabasz_score(X, labels) - 561.59... + 561.59 .. topic:: Advantages: @@ -2074,42 +1990,35 @@ cluster analysis: other concepts of clusters, such as density based clusters like those obtained through DBSCAN. -|details-start| -**Mathematical formulation** -|details-split| - -For a set of data :math:`E` of size :math:`n_E` which has been clustered into -:math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the -ratio of the between-clusters dispersion mean and the within-cluster -dispersion: +.. dropdown:: Mathematical formulation -.. math:: - s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} + For a set of data :math:`E` of size :math:`n_E` which has been clustered into + :math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the + ratio of the between-clusters dispersion mean and the within-cluster + dispersion: -where :math:`\mathrm{tr}(B_k)` is trace of the between group dispersion matrix -and :math:`\mathrm{tr}(W_k)` is the trace of the within-cluster dispersion -matrix defined by: + .. math:: + s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} -.. math:: W_k = \sum_{q=1}^k \sum_{x \in C_q} (x - c_q) (x - c_q)^T + where :math:`\mathrm{tr}(B_k)` is trace of the between group dispersion matrix + and :math:`\mathrm{tr}(W_k)` is the trace of the within-cluster dispersion + matrix defined by: -.. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T + .. math:: W_k = \sum_{q=1}^k \sum_{x \in C_q} (x - c_q) (x - c_q)^T -with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the -center of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and -:math:`n_q` the number of points in cluster :math:`q`. + .. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T -|details-end| + with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the + center of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and + :math:`n_q` the number of points in cluster :math:`q`. -|details-start| -**References** -|details-split| +.. dropdown:: References -* Caliński, T., & Harabasz, J. (1974). `"A Dendrite Method for Cluster Analysis" - `_. - :doi:`Communications in Statistics-theory and Methods 3: 1-27 - <10.1080/03610927408827101>`. + * Caliński, T., & Harabasz, J. (1974). `"A Dendrite Method for Cluster Analysis" + `_. + :doi:`Communications in Statistics-theory and Methods 3: 1-27 + <10.1080/03610927408827101>`. -|details-end| .. _davies-bouldin_index: @@ -2139,7 +2048,7 @@ cluster analysis as follows: >>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans.labels_ >>> davies_bouldin_score(X, labels) - 0.666... + 0.666 .. topic:: Advantages: @@ -2150,55 +2059,47 @@ cluster analysis as follows: .. topic:: Drawbacks: - - The Davies-Boulding index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - from DBSCAN. + - The Davies-Bouldin index is generally higher for convex clusters than other + concepts of clusters, such as density-based clusters like those + obtained from DBSCAN. - The usage of centroid distance limits the distance metric to Euclidean space. +.. dropdown:: Mathematical formulation -|details-start| -**Mathematical formulation** -|details-split| + The index is defined as the average similarity between each cluster :math:`C_i` + for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of + this index, similarity is defined as a measure :math:`R_{ij}` that trades off: -The index is defined as the average similarity between each cluster :math:`C_i` -for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of -this index, similarity is defined as a measure :math:`R_{ij}` that trades off: + - :math:`s_i`, the average distance between each point of cluster :math:`i` and + the centroid of that cluster -- also known as cluster diameter. + - :math:`d_{ij}`, the distance between cluster centroids :math:`i` and + :math:`j`. -- :math:`s_i`, the average distance between each point of cluster :math:`i` and - the centroid of that cluster -- also know as cluster diameter. -- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and - :math:`j`. + A simple choice to construct :math:`R_{ij}` so that it is nonnegative and + symmetric is: -A simple choice to construct :math:`R_{ij}` so that it is nonnegative and -symmetric is: + .. math:: + R_{ij} = \frac{s_i + s_j}{d_{ij}} -.. math:: - R_{ij} = \frac{s_i + s_j}{d_{ij}} + Then the Davies-Bouldin index is defined as: -Then the Davies-Bouldin index is defined as: + .. math:: + DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} -.. math:: - DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} +.. dropdown:: References -|details-end| + * Davies, David L.; Bouldin, Donald W. (1979). :doi:`"A Cluster Separation + Measure" <10.1109/TPAMI.1979.4766909>` IEEE Transactions on Pattern Analysis + and Machine Intelligence. PAMI-1 (2): 224-227. -|details-start| -**References** -|details-split| + * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). :doi:`"On + Clustering Validation Techniques" <10.1023/A:1012801612483>` Journal of + Intelligent Information Systems, 17(2-3), 107-145. -* Davies, David L.; Bouldin, Donald W. (1979). :doi:`"A Cluster Separation - Measure" <10.1109/TPAMI.1979.4766909>` IEEE Transactions on Pattern Analysis - and Machine Intelligence. PAMI-1 (2): 224-227. + * `Wikipedia entry for Davies-Bouldin index + `_. -* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). :doi:`"On - Clustering Validation Techniques" <10.1023/A:1012801612483>` Journal of - Intelligent Information Systems, 17(2-3), 107-145. - -* `Wikipedia entry for Davies-Bouldin index - `_. - -|details-end| .. _contingency_matrix: @@ -2220,7 +2121,7 @@ Here is an example:: array([[2, 1, 0], [0, 1, 2]]) -The first row of output array indicates that there are three samples whose +The first row of the output array indicates that there are three samples whose true cluster is "a". Of them, two are in predicted cluster 0, one is in 1, and none is in 2. And the second row indicates that there are three samples whose true cluster is "b". Of them, none is in predicted cluster 0, one is in @@ -2248,15 +2149,11 @@ of classes. - It doesn't give a single metric to use as an objective for clustering optimisation. +.. dropdown:: References -|details-start| -**References** -|details-split| + * `Wikipedia entry for contingency matrix + `_ -* `Wikipedia entry for contingency matrix - `_ - -|details-end| .. _pair_confusion_matrix: @@ -2334,11 +2231,7 @@ diagonal entries:: array([[ 0, 0], [12, 0]]) -|details-start| -**References** -|details-split| - - * :doi:`"Comparing Partitions" <10.1007/BF01908075>` L. Hubert and P. Arabie, - Journal of Classification 1985 +.. dropdown:: References -|details-end| + * :doi:`"Comparing Partitions" <10.1007/BF01908075>` L. Hubert and P. Arabie, + Journal of Classification 1985 diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 28931cf52f283..3ef0d94236aa6 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -79,20 +79,16 @@ is an estimator object:: >>> pipe Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())]) -|details-start| -**Shorthand version using :func:`make_pipeline`** -|details-split| +.. dropdown:: Shorthand version using :func:`make_pipeline` -The utility function :func:`make_pipeline` is a shorthand -for constructing pipelines; -it takes a variable number of estimators and returns a pipeline, -filling in the names automatically:: + The utility function :func:`make_pipeline` is a shorthand + for constructing pipelines; + it takes a variable number of estimators and returns a pipeline, + filling in the names automatically:: - >>> from sklearn.pipeline import make_pipeline - >>> make_pipeline(PCA(), SVC()) - Pipeline(steps=[('pca', PCA()), ('svc', SVC())]) - -|details-end| + >>> from sklearn.pipeline import make_pipeline + >>> make_pipeline(PCA(), SVC()) + Pipeline(steps=[('pca', PCA()), ('svc', SVC())]) Access pipeline steps ..................... @@ -108,27 +104,23 @@ permitted). This is convenient for performing only some of the transformations >>> pipe[-1:] Pipeline(steps=[('clf', SVC())]) -|details-start| -**Accessing a step by name or position** -|details-split| - -A specific step can also be accessed by index or name by indexing (with ``[idx]``) the -pipeline:: +.. dropdown:: Accessing a step by name or position - >>> pipe.steps[0] - ('reduce_dim', PCA()) - >>> pipe[0] - PCA() - >>> pipe['reduce_dim'] - PCA() + A specific step can also be accessed by index or name by indexing (with ``[idx]``) the + pipeline:: -`Pipeline`'s `named_steps` attribute allows accessing steps by name with tab -completion in interactive environments:: + >>> pipe.steps[0] + ('reduce_dim', PCA()) + >>> pipe[0] + PCA() + >>> pipe['reduce_dim'] + PCA() - >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] - True + `Pipeline`'s `named_steps` attribute allows accessing steps by name with tab + completion in interactive environments:: -|details-end| + >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] + True Tracking feature names in a pipeline .................................... @@ -149,17 +141,13 @@ pipeline slicing to get the feature names going into each step:: >>> pipe[:-1].get_feature_names_out() array(['x2', 'x3'], ...) -|details-start| -**Customize feature names** -|details-split| - -You can also provide custom feature names for the input data using -``get_feature_names_out``:: +.. dropdown:: Customize feature names - >>> pipe[:-1].get_feature_names_out(iris.feature_names) - array(['petal length (cm)', 'petal width (cm)'], ...) + You can also provide custom feature names for the input data using + ``get_feature_names_out``:: -|details-end| + >>> pipe[:-1].get_feature_names_out(iris.feature_names) + array(['petal length (cm)', 'petal width (cm)'], ...) .. _pipeline_nested_parameters: @@ -175,40 +163,37 @@ syntax:: >>> pipe.set_params(clf__C=10) Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))]) -|details-start| -**When does it matter?** -|details-split| +.. dropdown:: When does it matter? -This is particularly important for doing grid searches:: + This is particularly important for doing grid searches:: - >>> from sklearn.model_selection import GridSearchCV - >>> param_grid = dict(reduce_dim__n_components=[2, 5, 10], - ... clf__C=[0.1, 10, 100]) - >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) + >>> from sklearn.model_selection import GridSearchCV + >>> param_grid = dict(reduce_dim__n_components=[2, 5, 10], + ... clf__C=[0.1, 10, 100]) + >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) -Individual steps may also be replaced as parameters, and non-final steps may be -ignored by setting them to ``'passthrough'``:: + Individual steps may also be replaced as parameters, and non-final steps may be + ignored by setting them to ``'passthrough'``:: - >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)], - ... clf=[SVC(), LogisticRegression()], - ... clf__C=[0.1, 10, 100]) - >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) + >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)], + ... clf=[SVC(), LogisticRegression()], + ... clf__C=[0.1, 10, 100]) + >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) -.. topic:: See Also: + .. seealso:: - * :ref:`composite_grid_search` + * :ref:`composite_grid_search` -|details-end| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` - * :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` - * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +* :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` +* :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py` .. _pipeline_cache: @@ -245,53 +230,49 @@ object:: >>> # Clear the cache directory when you don't need it anymore >>> rmtree(cachedir) -|details-start| -**Warning: Side effect of caching transformers** -|details-split| - -Using a :class:`Pipeline` without cache enabled, it is possible to -inspect the original instance such as:: - - >>> from sklearn.datasets import load_digits - >>> X_digits, y_digits = load_digits(return_X_y=True) - >>> pca1 = PCA(n_components=10) - >>> svm1 = SVC() - >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) - >>> pipe.fit(X_digits, y_digits) - Pipeline(steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) - >>> # The pca instance can be inspected directly - >>> pca1.components_.shape - (10, 64) - - -Enabling caching triggers a clone of the transformers before fitting. -Therefore, the transformer instance given to the pipeline cannot be -inspected directly. -In following example, accessing the :class:`~sklearn.decomposition.PCA` -instance ``pca2`` will raise an ``AttributeError`` since ``pca2`` will be an -unfitted transformer. -Instead, use the attribute ``named_steps`` to inspect estimators within -the pipeline:: - - >>> cachedir = mkdtemp() - >>> pca2 = PCA(n_components=10) - >>> svm2 = SVC() - >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], - ... memory=cachedir) - >>> cached_pipe.fit(X_digits, y_digits) - Pipeline(memory=..., - steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) - >>> cached_pipe.named_steps['reduce_dim'].components_.shape - (10, 64) - >>> # Remove the cache directory - >>> rmtree(cachedir) - - -|details-end| - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` +.. dropdown:: Side effect of caching transformers + :color: warning + + Using a :class:`Pipeline` without cache enabled, it is possible to + inspect the original instance such as:: + + >>> from sklearn.datasets import load_digits + >>> X_digits, y_digits = load_digits(return_X_y=True) + >>> pca1 = PCA(n_components=10) + >>> svm1 = SVC() + >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) + >>> pipe.fit(X_digits, y_digits) + Pipeline(steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> # The pca instance can be inspected directly + >>> pca1.components_.shape + (10, 64) + + Enabling caching triggers a clone of the transformers before fitting. + Therefore, the transformer instance given to the pipeline cannot be + inspected directly. + In the following example, accessing the :class:`~sklearn.decomposition.PCA` + instance ``pca2`` will raise an ``AttributeError`` since ``pca2`` will be an + unfitted transformer. + Instead, use the attribute ``named_steps`` to inspect estimators within + the pipeline:: + + >>> cachedir = mkdtemp() + >>> pca2 = PCA(n_components=10) + >>> svm2 = SVC() + >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], + ... memory=cachedir) + >>> cached_pipe.fit(X_digits, y_digits) + Pipeline(memory=..., + steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> cached_pipe.named_steps['reduce_dim'].components_.shape + (10, 64) + >>> # Remove the cache directory + >>> rmtree(cachedir) + + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` .. _transformed_target_regressor: @@ -364,9 +345,9 @@ each other. However, it is possible to bypass this checking by setting pair of functions ``func`` and ``inverse_func``. However, setting both options will raise an error. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` +* :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` .. _feature_union: @@ -428,9 +409,9 @@ and ignored by setting to ``'drop'``:: FeatureUnion(transformer_list=[('linear_pca', PCA()), ('kernel_pca', 'drop')]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py` +* :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py` .. _column_transformer: @@ -523,10 +504,10 @@ on data type or column name:: ... OneHotEncoder(), ... make_column_selector(pattern='city', dtype_include=object))]) >>> ct.fit_transform(X) - array([[ 0.904..., 0. , 1. , 0. , 0. ], - [-1.507..., 1.414..., 1. , 0. , 0. ], - [-0.301..., 0. , 0. , 1. , 0. ], - [ 0.904..., -1.414..., 0. , 0. , 1. ]]) + array([[ 0.904, 0. , 1. , 0. , 0. ], + [-1.507, 1.414, 1. , 0. , 0. ], + [-0.301, 0. , 0. , 1. , 0. ], + [ 0.904, -1.414, 0. , 0. , 1. ]]) Strings can reference columns if the input is a DataFrame, integers are always interpreted as the positional columns. @@ -590,9 +571,9 @@ will use the column names to select the columns:: >>> X_new = pd.DataFrame({"expert_rating": [5, 6, 1], ... "ignored_new_col": [1.2, 0.3, -0.1]}) >>> ct.transform(X_new) - array([[ 0.9...], - [ 2.1...], - [-3.9...]]) + array([[ 0.9], + [ 2.1], + [-3.9]]) .. _visualizing_composite_estimators: @@ -623,7 +604,7 @@ As an alternative, the HTML can be written to a file using >>> with open('my_estimator.html', 'w') as f: # doctest: +SKIP ... f.write(estimator_html_repr(clf)) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` - * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` +* :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` +* :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index 50927f9a677f6..0eadfa2c8c584 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -40,11 +40,10 @@ on whether the data are centered, so one may want to use the same mean vector as the training set. If not, both should be centered by the user, and ``assume_centered=True`` should be used. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit an :class:`EmpiricalCovariance` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit an :class:`EmpiricalCovariance` object to data. .. _shrunk_covariance: @@ -77,18 +76,17 @@ smallest and the largest eigenvalues of the empirical covariance matrix. It can be done by simply shifting every eigenvalue according to a given offset, which is equivalent of finding the l2-penalized Maximum Likelihood Estimator of the covariance matrix. In practice, shrinkage -boils down to a simple a convex transformation : :math:`\Sigma_{\rm +boils down to a simple convex transformation : :math:`\Sigma_{\rm shrunk} = (1-\alpha)\hat{\Sigma} + \alpha\frac{{\rm Tr}\hat{\Sigma}}{p}\rm Id`. Choosing the amount of shrinkage, :math:`\alpha` amounts to setting a bias/variance trade-off, and is discussed below. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit a :class:`ShrunkCovariance` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit a :class:`ShrunkCovariance` object to data. Ledoit-Wolf shrinkage @@ -109,30 +107,30 @@ fitting a :class:`LedoitWolf` object to the same sample. It is important to note that when the number of samples is much larger than the number of features, one would expect that no shrinkage would be necessary. The intuition behind this is that if the population covariance - is full rank, when the number of sample grows, the sample covariance will - also become positive definite. As a result, no shrinkage would necessary + is full rank, when the number of samples grows, the sample covariance will + also become positive definite. As a result, no shrinkage would be necessary and the method should automatically do this. This, however, is not the case in the Ledoit-Wolf procedure when the population covariance happens to be a multiple of the identity matrix. In this case, the Ledoit-Wolf shrinkage estimate approaches 1 as the number of samples increases. This indicates that the optimal estimate of the - covariance matrix in the Ledoit-Wolf sense is multiple of the identity. + covariance matrix in the Ledoit-Wolf sense is a multiple of the identity. Since the population covariance is already a multiple of the identity matrix, the Ledoit-Wolf solution is indeed a reasonable estimate. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit a :class:`LedoitWolf` object to data and - for visualizing the performances of the Ledoit-Wolf estimator in - terms of likelihood. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit a :class:`LedoitWolf` object to data and + for visualizing the performances of the Ledoit-Wolf estimator in + terms of likelihood. -.. topic:: References: +.. rubric:: References - .. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional - Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, - February 2004, pages 365-411. +.. [1] O. Ledoit and M. Wolf, "A Well-Conditioned Estimator for Large-Dimensional + Covariance Matrices", Journal of Multivariate Analysis, Volume 88, Issue 2, + February 2004, pages 365-411. .. _oracle_approximating_shrinkage: @@ -158,22 +156,21 @@ object to the same sample. Bias-variance trade-off when setting the shrinkage: comparing the choices of Ledoit-Wolf and OAS estimators -.. topic:: References: +.. rubric:: References - .. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", - Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. - IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. - <0907.4698>` +.. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for - an example on how to fit an :class:`OAS` object - to data. +* See :ref:`sphx_glr_auto_examples_covariance_plot_covariance_estimation.py` for + an example on how to fit an :class:`OAS` object to data. - * See :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py` to visualize the - Mean Squared Error difference between a :class:`LedoitWolf` and - an :class:`OAS` estimator of the covariance. +* See :ref:`sphx_glr_auto_examples_covariance_plot_lw_vs_oas.py` to visualize the + Mean Squared Error difference between a :class:`LedoitWolf` and + an :class:`OAS` estimator of the covariance. .. figure:: ../auto_examples/covariance/images/sphx_glr_plot_lw_vs_oas_001.png @@ -254,20 +251,20 @@ problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R ``glasso`` package. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_covariance_plot_sparse_cov.py`: example on synthetic - data showing some recovery of a structure, and comparing to other - covariance estimators. +* :ref:`sphx_glr_auto_examples_covariance_plot_sparse_cov.py`: example on synthetic + data showing some recovery of a structure, and comparing to other + covariance estimators. - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py`: example on real - stock market data, finding which symbols are most linked. +* :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py`: example on real + stock market data, finding which symbols are most linked. -.. topic:: References: +.. rubric:: References - * Friedman et al, `"Sparse inverse covariance estimation with the - graphical lasso" `_, - Biostatistics 9, pp 432, 2008 +* Friedman et al, `"Sparse inverse covariance estimation with the + graphical lasso" `_, + Biostatistics 9, pp 432, 2008 .. _robust_covariance: @@ -313,24 +310,24 @@ the same time. Raw estimates can be accessed as ``raw_location_`` and ``raw_covariance_`` attributes of a :class:`MinCovDet` robust covariance estimator object. -.. topic:: References: +.. rubric:: References - .. [3] P. J. Rousseeuw. Least median of squares regression. - J. Am Stat Ass, 79:871, 1984. - .. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, - 1999, American Statistical Association and the American Society - for Quality, TECHNOMETRICS. +.. [3] P. J. Rousseeuw. Least median of squares regression. + J. Am Stat Ass, 79:871, 1984. +.. [4] A Fast Algorithm for the Minimum Covariance Determinant Estimator, + 1999, American Statistical Association and the American Society + for Quality, TECHNOMETRICS. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_covariance_plot_robust_vs_empirical_covariance.py` for - an example on how to fit a :class:`MinCovDet` object to data and see how - the estimate remains accurate despite the presence of outliers. +* See :ref:`sphx_glr_auto_examples_covariance_plot_robust_vs_empirical_covariance.py` for + an example on how to fit a :class:`MinCovDet` object to data and see how + the estimate remains accurate despite the presence of outliers. - * See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` to - visualize the difference between :class:`EmpiricalCovariance` and - :class:`MinCovDet` covariance estimators in terms of Mahalanobis distance - (so we get a better estimate of the precision matrix too). +* See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` to + visualize the difference between :class:`EmpiricalCovariance` and + :class:`MinCovDet` covariance estimators in terms of Mahalanobis distance + (so we get a better estimate of the precision matrix too). .. |robust_vs_emp| image:: ../auto_examples/covariance/images/sphx_glr_plot_robust_vs_empirical_covariance_001.png :target: ../auto_examples/covariance/plot_robust_vs_empirical_covariance.html diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst index 8f8d217f87144..01722cbd07ab6 100644 --- a/doc/modules/cross_decomposition.rst +++ b/doc/modules/cross_decomposition.rst @@ -30,7 +30,7 @@ the samples are first projected into a lower-dimensional subspace, and the targets `y` are predicted using `transformed(X)`. One issue with PCR is that the dimensionality reduction is unsupervised, and may lose some important variables: PCR would keep the features with the most variance, but it's -possible that features with a small variances are relevant from predicting +possible that features with small variances are relevant for predicting the target. In a way, PLS allows for the same kind of dimensionality reduction, but by taking into account the targets `y`. An illustration of this fact is given in the following example: @@ -88,46 +88,39 @@ Note that the scores matrices :math:`\Xi` and :math:`\Omega` correspond to the projections of the training data :math:`X` and :math:`Y`, respectively. Step *a)* may be performed in two ways: either by computing the whole SVD of -:math:`C` and only retain the singular vectors with the biggest singular +:math:`C` and only retaining the singular vectors with the biggest singular values, or by directly computing the singular vectors using the power method (cf section 11.3 in [1]_), which corresponds to the `'nipals'` option of the `algorithm` parameter. -|details-start| -**Transforming data** -|details-split| +.. dropdown:: Transforming data -To transform :math:`X` into :math:`\bar{X}`, we need to find a projection -matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the -training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting -:math:`P = U(\Gamma^T U)^{-1}` where :math:`U` is the matrix with the -:math:`u_k` in the columns, we have :math:`XP = X U(\Gamma^T U)^{-1} = \Xi -(\Gamma^T U) (\Gamma^T U)^{-1} = \Xi` as desired. The rotation matrix -:math:`P` can be accessed from the `x_rotations_` attribute. + To transform :math:`X` into :math:`\bar{X}`, we need to find a projection + matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the + training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting + :math:`P = U(\Gamma^T U)^{-1}` where :math:`U` is the matrix with the + :math:`u_k` in the columns, we have :math:`XP = X U(\Gamma^T U)^{-1} = \Xi + (\Gamma^T U) (\Gamma^T U)^{-1} = \Xi` as desired. The rotation matrix + :math:`P` can be accessed from the `x_rotations_` attribute. -Similarly, :math:`Y` can be transformed using the rotation matrix -:math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute. -|details-end| + Similarly, :math:`Y` can be transformed using the rotation matrix + :math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute. -|details-start| -**Predicting the targets Y** -|details-split| +.. dropdown:: Predicting the targets `Y` -To predict the targets of some data :math:`X`, we are looking for a -coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y = -X\beta`. + To predict the targets of some data :math:`X`, we are looking for a + coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y = + X\beta`. -The idea is to try to predict the transformed targets :math:`\Omega` as a -function of the transformed samples :math:`\Xi`, by computing :math:`\alpha -\in \mathbb{R}` such that :math:`\Omega = \alpha \Xi`. + The idea is to try to predict the transformed targets :math:`\Omega` as a + function of the transformed samples :math:`\Xi`, by computing :math:`\alpha + \in \mathbb{R}` such that :math:`\Omega = \alpha \Xi`. -Then, we have :math:`Y = \Omega \Delta^T = \alpha \Xi \Delta^T`, and since -:math:`\Xi` is the transformed training data we have that :math:`Y = X \alpha -P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P -\Delta^T`. + Then, we have :math:`Y = \Omega \Delta^T = \alpha \Xi \Delta^T`, and since + :math:`\Xi` is the transformed training data we have that :math:`Y = X \alpha + P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P + \Delta^T`. -:math:`\beta` can be accessed through the `coef_` attribute. - -|details-end| + :math:`\beta` can be accessed through the `coef_` attribute. PLSSVD ------ @@ -184,18 +177,13 @@ Since :class:`CCA` involves the inversion of :math:`X_k^TX_k` and :math:`Y_k^TY_k`, this estimator can be unstable if the number of features or targets is greater than the number of samples. -|details-start| -**Reference** -|details-split| - - .. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on - the two-block case - `_ - JA Wegelin +.. rubric:: References -|details-end| +.. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block + case `_, + JA Wegelin -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` - * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py` +* :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` +* :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py` diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 34f14fe6846a2..bfdee6c8a043d 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -55,7 +55,7 @@ data for testing (evaluating) our classifier:: >>> clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train) >>> clf.score(X_test, y_test) - 0.96... + 0.96 When evaluating different settings ("hyperparameters") for estimators, such as the ``C`` setting that must be manually set for an SVM, @@ -120,7 +120,7 @@ time):: >>> clf = svm.SVC(kernel='linear', C=1, random_state=42) >>> scores = cross_val_score(clf, X, y, cv=5) >>> scores - array([0.96..., 1. , 0.96..., 0.96..., 1. ]) + array([0.96, 1. , 0.96, 0.96, 1. ]) The mean score and the standard deviation are hence given by:: @@ -135,7 +135,7 @@ scoring parameter:: >>> scores = cross_val_score( ... clf, X, y, cv=5, scoring='f1_macro') >>> scores - array([0.96..., 1. ..., 0.96..., 0.96..., 1. ]) + array([0.96, 1., 0.96, 0.96, 1.]) See :ref:`scoring_parameter` for details. In the case of the Iris dataset, the samples are balanced across target @@ -153,7 +153,7 @@ validation iterator instead, for instance:: >>> n_samples = X.shape[0] >>> cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0) >>> cross_val_score(clf, X, y, cv=cv) - array([0.977..., 0.977..., 1. ..., 0.955..., 1. ]) + array([0.977, 0.977, 1., 0.955, 1.]) Another option is to use an iterable yielding (train, test) splits as arrays of indices, for example:: @@ -168,38 +168,35 @@ indices, for example:: ... >>> custom_cv = custom_cv_2folds(X) >>> cross_val_score(clf, X, y, cv=custom_cv) - array([1. , 0.973...]) + array([1. , 0.973]) -|details-start| -**Data transformation with held out data** -|details-split| +.. dropdown:: Data transformation with held-out data - Just as it is important to test a predictor on data held-out from - training, preprocessing (such as standardization, feature selection, etc.) - and similar :ref:`data transformations ` similarly should - be learnt from a training set and applied to held-out data for prediction:: + Just as it is important to test a predictor on data held-out from + training, preprocessing (such as standardization, feature selection, etc.) + and similar :ref:`data transformations ` similarly should + be learnt from a training set and applied to held-out data for prediction:: - >>> from sklearn import preprocessing - >>> X_train, X_test, y_train, y_test = train_test_split( - ... X, y, test_size=0.4, random_state=0) - >>> scaler = preprocessing.StandardScaler().fit(X_train) - >>> X_train_transformed = scaler.transform(X_train) - >>> clf = svm.SVC(C=1).fit(X_train_transformed, y_train) - >>> X_test_transformed = scaler.transform(X_test) - >>> clf.score(X_test_transformed, y_test) - 0.9333... + >>> from sklearn import preprocessing + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, test_size=0.4, random_state=0) + >>> scaler = preprocessing.StandardScaler().fit(X_train) + >>> X_train_transformed = scaler.transform(X_train) + >>> clf = svm.SVC(C=1).fit(X_train_transformed, y_train) + >>> X_test_transformed = scaler.transform(X_test) + >>> clf.score(X_test_transformed, y_test) + 0.9333 - A :class:`Pipeline ` makes it easier to compose - estimators, providing this behavior under cross-validation:: + A :class:`Pipeline ` makes it easier to compose + estimators, providing this behavior under cross-validation:: - >>> from sklearn.pipeline import make_pipeline - >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1)) - >>> cross_val_score(clf, X, y, cv=cv) - array([0.977..., 0.933..., 0.955..., 0.933..., 0.977...]) + >>> from sklearn.pipeline import make_pipeline + >>> clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1)) + >>> cross_val_score(clf, X, y, cv=cv) + array([0.977, 0.933, 0.955, 0.933, 0.977]) - See :ref:`combining_estimators`. + See :ref:`combining_estimators`. -|details-end| .. _multimetric_cross_validation: @@ -240,7 +237,7 @@ predefined scorer names:: >>> sorted(scores.keys()) ['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro'] >>> scores['test_recall_macro'] - array([0.96..., 1. ..., 0.96..., 0.96..., 1. ]) + array([0.96, 1., 0.96, 0.96, 1.]) Or as a dict mapping scorer name to a predefined or custom scoring function:: @@ -253,7 +250,7 @@ Or as a dict mapping scorer name to a predefined or custom scoring function:: ['fit_time', 'score_time', 'test_prec_macro', 'test_rec_macro', 'train_prec_macro', 'train_rec_macro'] >>> scores['train_rec_macro'] - array([0.97..., 0.97..., 0.99..., 0.98..., 0.98...]) + array([0.97, 0.97, 0.99, 0.98, 0.98]) Here is an example of ``cross_validate`` using a single metric:: @@ -294,14 +291,14 @@ The function :func:`cross_val_predict` is appropriate for: The available cross validation iterators are introduced in the following section. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`, - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, - * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. +* :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`, +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, +* :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. Cross validation iterators ========================== @@ -406,7 +403,7 @@ Leave One Out (LOO) :class:`LeaveOneOut` (or LOO) is a simple cross-validation. Each learning set is created by taking all the samples except one, the test set being the sample left out. Thus, for :math:`n` samples, we have :math:`n` different -training sets and :math:`n` different tests set. This cross-validation +training sets and :math:`n` different test sets. This cross-validation procedure does not waste much data as only one sample is removed from the training set:: @@ -442,23 +439,19 @@ then 5- or 10- fold cross validation can overestimate the generalization error. As a general rule, most authors, and empirical evidence, suggest that 5- or 10- fold cross validation should be preferred to LOO. -|details-start| -**References** -|details-split| +.. dropdown:: References - * ``_; - * T. Hastie, R. Tibshirani, J. Friedman, `The Elements of Statistical Learning - `_, Springer 2009 - * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case - `_, International Statistical Review 1992; - * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection - `_, Intl. Jnt. Conf. AI - * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation - `_, SIAM 2008; - * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to - Statistical Learning `_, Springer 2013. - -|details-end| + * ``_; + * T. Hastie, R. Tibshirani, J. Friedman, `The Elements of Statistical Learning + `_, Springer 2009 + * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case + `_, International Statistical Review 1992; + * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection + `_, Intl. Jnt. Conf. AI + * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation + `_, SIAM 2008; + * G. James, D. Witten, T. Hastie, R. Tibshirani, `An Introduction to + Statistical Learning `_, Springer 2013. .. _leave_p_out: @@ -530,12 +523,33 @@ the proportion of samples on each side of the train / test split. Cross-validation iterators with stratification based on class labels -------------------------------------------------------------------- -Some classification problems can exhibit a large imbalance in the distribution -of the target classes: for instance there could be several times more negative -samples than positive samples. In such cases it is recommended to use -stratified sampling as implemented in :class:`StratifiedKFold` and -:class:`StratifiedShuffleSplit` to ensure that relative class frequencies is -approximately preserved in each train and validation fold. +Some classification tasks can naturally exhibit rare classes: for instance, +there could be orders of magnitude more negative observations than positive +observations (e.g. medical screening, fraud detection, etc). As a result, +cross-validation splitting can generate train or validation folds without any +occurrence of a particular class. This typically leads to undefined +classification metrics (e.g. ROC AUC), exceptions raised when attempting to +call :term:`fit` or missing columns in the output of the `predict_proba` or +`decision_function` methods of multiclass classifiers trained on different +folds. + +To mitigate such problems, splitters such as :class:`StratifiedKFold` and +:class:`StratifiedShuffleSplit` implement stratified sampling to ensure that +relative class frequencies are approximately preserved in each fold. + +.. note:: + + Stratified sampling was introduced in scikit-learn to workaround the + aforementioned engineering problems rather than solve a statistical one. + + Stratification makes cross-validation folds more homogeneous, and as a result + hides some of the variability inherent to fitting models with a limited + number of observations. + + As a result, stratification can artificially shrink the spread of the metric + measured across cross-validation iterations: the inter-fold variability does + no longer reflect the uncertainty in the performance of classifiers in the + presence of rare classes. .. _stratified_k_fold: @@ -569,7 +583,7 @@ two unbalanced classes. We show the number of samples in each class and compare train - [34] | test - [11 5] We can see that :class:`StratifiedKFold` preserves the class ratios -(approximately 1 / 10) in both train and test dataset. +(approximately 1 / 10) in both train and test datasets. Here is a visualization of the cross-validation behavior. @@ -587,7 +601,7 @@ Stratified Shuffle Split ^^^^^^^^^^^^^^^^^^^^^^^^ :class:`StratifiedShuffleSplit` is a variation of *ShuffleSplit*, which returns -stratified splits, *i.e* which creates splits by preserving the same +stratified splits, *i.e.* which creates splits by preserving the same percentage for each target class as in the complete set. Here is a visualization of the cross-validation behavior. @@ -615,7 +629,7 @@ samples that are part of the validation set, and to -1 for all other samples. Cross-validation iterators for grouped data ------------------------------------------- -The i.i.d. assumption is broken if the underlying generative process yield +The i.i.d. assumption is broken if the underlying generative process yields groups of dependent samples. Such a grouping of data is domain specific. An example would be when there is @@ -672,9 +686,11 @@ Here is a visualization of the cross-validation behavior. :scale: 75% Similar to :class:`KFold`, the test sets from :class:`GroupKFold` will form a -complete partition of all the data. Unlike :class:`KFold`, :class:`GroupKFold` -is not randomized at all, whereas :class:`KFold` is randomized when -``shuffle=True``. +complete partition of all the data. + +While :class:`GroupKFold` attempts to place the same number of samples in each +fold when ``shuffle=False``, when ``shuffle=True`` it attempts to place an equal +number of distinct groups in each fold (but does not account for group sizes). .. _stratified_group_k_fold: @@ -700,30 +716,27 @@ Example:: [ 0 1 4 5 6 7 8 9 11 12 13 14] [ 2 3 10 15 16 17] [ 1 2 3 8 9 10 12 13 14 15 16 17] [ 0 4 5 6 7 11] -|details-start| -**Implementation notes** -|details-split| +.. dropdown:: Implementation notes -- With the current implementation full shuffle is not possible in most - scenarios. When shuffle=True, the following happens: + - With the current implementation full shuffle is not possible in most + scenarios. When shuffle=True, the following happens: - 1. All groups are shuffled. - 2. Groups are sorted by standard deviation of classes using stable sort. - 3. Sorted groups are iterated over and assigned to folds. + 1. All groups are shuffled. + 2. Groups are sorted by standard deviation of classes using stable sort. + 3. Sorted groups are iterated over and assigned to folds. - That means that only groups with the same standard deviation of class - distribution will be shuffled, which might be useful when each group has only - a single class. -- The algorithm greedily assigns each group to one of n_splits test sets, - choosing the test set that minimises the variance in class distribution - across test sets. Group assignment proceeds from groups with highest to - lowest variance in class frequency, i.e. large groups peaked on one or few - classes are assigned first. -- This split is suboptimal in a sense that it might produce imbalanced splits - even if perfect stratification is possible. If you have relatively close - distribution of classes in each group, using :class:`GroupKFold` is better. + That means that only groups with the same standard deviation of class + distribution will be shuffled, which might be useful when each group has only + a single class. + - The algorithm greedily assigns each group to one of n_splits test sets, + choosing the test set that minimises the variance in class distribution + across test sets. Group assignment proceeds from groups with highest to + lowest variance in class frequency, i.e. large groups peaked on one or few + classes are assigned first. + - This split is suboptimal in a sense that it might produce imbalanced splits + even if perfect stratification is possible. If you have relatively close + distribution of classes in each group, using :class:`GroupKFold` is better. -|details-end| Here is a visualization of cross-validation behavior for uneven groups: @@ -771,7 +784,7 @@ for cross-validation against time-based splits. Leave P Groups Out ^^^^^^^^^^^^^^^^^^ -:class:`LeavePGroupsOut` is similar as :class:`LeaveOneGroupOut`, but removes +:class:`LeavePGroupsOut` is similar to :class:`LeaveOneGroupOut`, but removes samples related to :math:`P` groups for each training/test set. All possible combinations of :math:`P` groups are left out, meaning test sets will overlap for :math:`P>1`. @@ -889,7 +902,8 @@ Also, it adds all surplus data to the first training partition, which is always used to train the model. This class can be used to cross-validate time series data samples -that are observed at fixed time intervals. +that are observed at fixed time intervals. Indeed, the folds must +represent the same duration, in order to have comparable metrics across folds. Example of 3-split time series cross-validation on a dataset with 6 samples:: @@ -917,8 +931,8 @@ A note on shuffling =================== If the data ordering is not arbitrary (e.g. samples with the same class label -are contiguous), shuffling it first may be essential to get a meaningful cross- -validation result. However, the opposite may be true if the samples are not +are contiguous), shuffling it first may be essential to get a meaningful +cross-validation result. However, the opposite may be true if the samples are not independently and identically distributed. For example, if samples correspond to news articles, and are ordered by their time of publication, then shuffling the data will likely lead to a model that is overfit and an inflated validation @@ -929,8 +943,8 @@ Some cross validation iterators, such as :class:`KFold`, have an inbuilt option to shuffle the data indices before splitting them. Note that: * This consumes less memory than shuffling the data directly. -* By default no shuffling occurs, including for the (stratified) K fold cross- - validation performed by specifying ``cv=some_integer`` to +* By default no shuffling occurs, including for the (stratified) K fold + cross-validation performed by specifying ``cv=some_integer`` to :func:`cross_val_score`, grid search, etc. Keep in mind that :func:`train_test_split` still returns a random split. * The ``random_state`` parameter defaults to ``None``, meaning that the @@ -955,60 +969,59 @@ Permutation test score ====================== :func:`~sklearn.model_selection.permutation_test_score` offers another way -to evaluate the performance of classifiers. It provides a permutation-based -p-value, which represents how likely an observed performance of the -classifier would be obtained by chance. The null hypothesis in this test is -that the classifier fails to leverage any statistical dependency between the -features and the labels to make correct predictions on left out data. +to evaluate the performance of a :term:`predictor`. It provides a +permutation-based p-value, which represents how likely an observed performance of the +estimator would be obtained by chance. The null hypothesis in this test is +that the estimator fails to leverage any statistical dependency between the +features and the targets to make correct predictions on left-out data. :func:`~sklearn.model_selection.permutation_test_score` generates a null distribution by calculating `n_permutations` different permutations of the -data. In each permutation the labels are randomly shuffled, thereby removing -any dependency between the features and the labels. The p-value output -is the fraction of permutations for which the average cross-validation score -obtained by the model is better than the cross-validation score obtained by -the model using the original data. For reliable results ``n_permutations`` -should typically be larger than 100 and ``cv`` between 3-10 folds. - -A low p-value provides evidence that the dataset contains real dependency -between features and labels and the classifier was able to utilize this -to obtain good results. A high p-value could be due to a lack of dependency -between features and labels (there is no difference in feature values between -the classes) or because the classifier was not able to use the dependency in -the data. In the latter case, using a more appropriate classifier that -is able to utilize the structure in the data, would result in a lower -p-value. - -Cross-validation provides information about how well a classifier generalizes, -specifically the range of expected errors of the classifier. However, a -classifier trained on a high dimensional dataset with no structure may still +data. In each permutation the target values are randomly shuffled, thereby removing +any dependency between the features and the targets. The p-value output is the fraction +of permutations whose cross-validation score is better or equal than the true score +without permuting targets. For reliable results ``n_permutations`` should typically be +larger than 100 and ``cv`` between 3-10 folds. + +A low p-value provides evidence that the dataset contains some real dependency between +features and targets **and** that the estimator was able to utilize this dependency to +obtain good results. A high p-value, in reverse, could be due to either one of these: + +- a lack of dependency between features and targets (i.e., there is no systematic + relationship and any observed patterns are likely due to random chance) +- **or** because the estimator was not able to use the dependency in the data (for + instance because it underfit). + +In the latter case, using a more appropriate estimator that is able to use the +structure in the data, would result in a lower p-value. + +Cross-validation provides information about how well an estimator generalizes +by estimating the range of its expected scores. However, an +estimator trained on a high dimensional dataset with no structure may still perform better than expected on cross-validation, just by chance. This can typically happen with small datasets with less than a few hundred samples. :func:`~sklearn.model_selection.permutation_test_score` provides information -on whether the classifier has found a real class structure and can help in -evaluating the performance of the classifier. +on whether the estimator has found a real dependency between features and targets and +can help in evaluating the performance of the estimator. It is important to note that this test has been shown to produce low p-values even if there is only weak structure in the data because in the corresponding permutated datasets there is absolutely no structure. This -test is therefore only able to show when the model reliably outperforms +test is therefore only able to show whether the model reliably outperforms random guessing. Finally, :func:`~sklearn.model_selection.permutation_test_score` is computed using brute force and internally fits ``(n_permutations + 1) * n_cv`` models. It is therefore only tractable with small datasets for which fitting an -individual model is very fast. - -.. topic:: Examples +individual model is very fast. Using the `n_jobs` parameter parallelizes the +computation and thus speeds it up. - * :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` +.. rubric:: Examples -|details-start| -**References** -|details-split| +* :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` - * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance - `_. - J. Mach. Learn. Res. 2010. +.. dropdown:: References -|details-end| + * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance + `_. + J. Mach. Learn. Res. 2010. diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index e8241a92cfc3b..24fcd43a292c0 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -51,11 +51,11 @@ data based on the amount of variance it explains. As such it implements a :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` .. _IncrementalPCA: @@ -79,7 +79,7 @@ out-of-core Principal Component Analysis either by: ``numpy.memmap``. :class:`IncrementalPCA` only stores estimates of component and noise variances, -in order update ``explained_variance_ratio_`` incrementally. This is why +in order to update ``explained_variance_ratio_`` incrementally. This is why memory usage depends on the number of samples per batch, rather than the number of samples to be processed in the dataset. @@ -97,9 +97,9 @@ input data for each feature before applying the SVD. :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py` .. _RandomizedPCA: @@ -120,7 +120,7 @@ pictures of human faces look somewhat alike. The samples lie on a manifold of much lower dimension (say around 200 for instance). The PCA algorithm can be used to linearly transform the data while both reducing the dimensionality -and preserve most of the explained variance at the same time. +and preserving most of the explained variance at the same time. The class :class:`PCA` used with the optional parameter ``svd_solver='randomized'`` is very useful in that case: since we are going @@ -160,20 +160,20 @@ Note: the implementation of ``inverse_transform`` in :class:`PCA` with ``transform`` even when ``whiten=False`` (default). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` -.. topic:: References: +.. rubric:: References - * Algorithm 4.3 in - :arxiv:`"Finding structure with randomness: Stochastic algorithms for - constructing approximate matrix decompositions" <0909.4061>` - Halko, et al., 2009 +* Algorithm 4.3 in + :arxiv:`"Finding structure with randomness: Stochastic algorithms for + constructing approximate matrix decompositions" <0909.4061>` + Halko, et al., 2009 - * :arxiv:`"An implementation of a randomized algorithm for principal component - analysis" <1412.3510>` A. Szlam et al. 2014 +* :arxiv:`"An implementation of a randomized algorithm for principal component + analysis" <1412.3510>` A. Szlam et al. 2014 .. _SparsePCA: @@ -197,7 +197,7 @@ the real underlying components can be more naturally imagined as sparse vectors; for example in face recognition, components might naturally map to parts of faces. -Sparse principal components yields a more parsimonious, interpretable +Sparse principal components yield a more parsimonious, interpretable representation, clearly emphasizing which of the original features contribute to the differences between samples. @@ -229,7 +229,7 @@ problem solved is a PCA problem (dictionary learning) with an .. math:: (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2} ||X-UV||_{\text{Fro}}^2+\alpha||V||_{1,1} \\ - \text{subject to } & ||U_k||_2 <= 1 \text{ for all } + \text{subject to } & ||U_k||_2 \leq 1 \text{ for all } 0 \leq k < n_{components} :math:`||.||_{\text{Fro}}` stands for the Frobenius norm and :math:`||.||_{1,1}` @@ -248,18 +248,18 @@ factorization, while larger values shrink many coefficients to zero. the algorithm is online along the features direction, not the samples direction. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` -.. topic:: References: +.. rubric:: References - .. [Mrl09] `"Online Dictionary Learning for Sparse Coding" - `_ - J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 - .. [Jen09] `"Structured Sparse Principal Component Analysis" - `_ - R. Jenatton, G. Obozinski, F. Bach, 2009 +.. [Mrl09] `"Online Dictionary Learning for Sparse Coding" + `_ + J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 +.. [Jen09] `"Structured Sparse Principal Component Analysis" + `_ + R. Jenatton, G. Obozinski, F. Bach, 2009 .. _kernel_PCA: @@ -288,22 +288,23 @@ prediction (kernel dependency estimation). :class:`KernelPCA` supports both :meth:`KernelPCA.inverse_transform` is an approximation. See the example linked below for more details. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py` +* :ref:`sphx_glr_auto_examples_applications_plot_digits_denoising.py` -.. topic:: References: +.. rubric:: References - .. [Scholkopf1997] Schölkopf, Bernhard, Alexander Smola, and Klaus-Robert Müller. - `"Kernel principal component analysis." - `_ - International conference on artificial neural networks. - Springer, Berlin, Heidelberg, 1997. +.. [Scholkopf1997] Schölkopf, Bernhard, Alexander Smola, and Klaus-Robert Müller. + `"Kernel principal component analysis." + `_ + International conference on artificial neural networks. + Springer, Berlin, Heidelberg, 1997. - .. [Bakir2003] Bakır, Gökhan H., Jason Weston, and Bernhard Schölkopf. - `"Learning to find pre-images." - `_ - Advances in neural information processing systems 16 (2003): 449-456. +.. [Bakir2003] Bakır, Gökhan H., Jason Weston, and Bernhard Schölkopf. + `"Learning to find pre-images." + `_ + Advances in neural information processing systems 16 (2003): 449-456. .. _kPCA_Solvers: @@ -321,36 +322,33 @@ is much smaller than its size. This is a situation where approximate eigensolvers can provide speedup with very low precision loss. -|details-start| -**Eigensolvers** -|details-split| +.. dropdown:: Eigensolvers -The optional parameter ``eigen_solver='randomized'`` can be used to -*significantly* reduce the computation time when the number of requested -``n_components`` is small compared with the number of samples. It relies on -randomized decomposition methods to find an approximate solution in a shorter -time. + The optional parameter ``eigen_solver='randomized'`` can be used to + *significantly* reduce the computation time when the number of requested + ``n_components`` is small compared with the number of samples. It relies on + randomized decomposition methods to find an approximate solution in a shorter + time. -The time complexity of the randomized :class:`KernelPCA` is -:math:`O(n_{\mathrm{samples}}^2 \cdot n_{\mathrm{components}})` -instead of :math:`O(n_{\mathrm{samples}}^3)` for the exact method -implemented with ``eigen_solver='dense'``. + The time complexity of the randomized :class:`KernelPCA` is + :math:`O(n_{\mathrm{samples}}^2 \cdot n_{\mathrm{components}})` + instead of :math:`O(n_{\mathrm{samples}}^3)` for the exact method + implemented with ``eigen_solver='dense'``. -The memory footprint of randomized :class:`KernelPCA` is also proportional to -:math:`2 \cdot n_{\mathrm{samples}} \cdot n_{\mathrm{components}}` instead of -:math:`n_{\mathrm{samples}}^2` for the exact method. + The memory footprint of randomized :class:`KernelPCA` is also proportional to + :math:`2 \cdot n_{\mathrm{samples}} \cdot n_{\mathrm{components}}` instead of + :math:`n_{\mathrm{samples}}^2` for the exact method. -Note: this technique is the same as in :ref:`RandomizedPCA`. + Note: this technique is the same as in :ref:`RandomizedPCA`. -In addition to the above two solvers, ``eigen_solver='arpack'`` can be used as -an alternate way to get an approximate decomposition. In practice, this method -only provides reasonable execution times when the number of components to find -is extremely small. It is enabled by default when the desired number of -components is less than 10 (strict) and the number of samples is more than 200 -(strict). See :class:`KernelPCA` for details. + In addition to the above two solvers, ``eigen_solver='arpack'`` can be used as + an alternate way to get an approximate decomposition. In practice, this method + only provides reasonable execution times when the number of components to find + is extremely small. It is enabled by default when the desired number of + components is less than 10 (strict) and the number of samples is more than 200 + (strict). See :class:`KernelPCA` for details. - -.. topic:: References: + .. rubric:: References * *dense* solver: `scipy.linalg.eigh documentation @@ -372,8 +370,6 @@ components is less than 10 (strict) and the number of samples is more than 200 `_ R. B. Lehoucq, D. C. Sorensen, and C. Yang, (1998) -|details-end| - .. _LSA: @@ -390,72 +386,67 @@ When the columnwise (per-feature) means of :math:`X` are subtracted from the feature values, truncated SVD on the resulting matrix is equivalent to PCA. -|details-start| -**About truncated SVD and latent semantic analysis (LSA)** -|details-split| - -When truncated SVD is applied to term-document matrices -(as returned by :class:`~sklearn.feature_extraction.text.CountVectorizer` or -:class:`~sklearn.feature_extraction.text.TfidfVectorizer`), -this transformation is known as -`latent semantic analysis `_ -(LSA), because it transforms such matrices -to a "semantic" space of low dimensionality. -In particular, LSA is known to combat the effects of synonymy and polysemy -(both of which roughly mean there are multiple meanings per word), -which cause term-document matrices to be overly sparse -and exhibit poor similarity under measures such as cosine similarity. +.. dropdown:: About truncated SVD and latent semantic analysis (LSA) -.. note:: - LSA is also known as latent semantic indexing, LSI, - though strictly that refers to its use in persistent indexes - for information retrieval purposes. + When truncated SVD is applied to term-document matrices + (as returned by :class:`~sklearn.feature_extraction.text.CountVectorizer` or + :class:`~sklearn.feature_extraction.text.TfidfVectorizer`), + this transformation is known as + `latent semantic analysis `_ + (LSA), because it transforms such matrices + to a "semantic" space of low dimensionality. + In particular, LSA is known to combat the effects of synonymy and polysemy + (both of which roughly mean there are multiple meanings per word), + which cause term-document matrices to be overly sparse + and exhibit poor similarity under measures such as cosine similarity. -Mathematically, truncated SVD applied to training samples :math:`X` -produces a low-rank approximation :math:`X`: - -.. math:: - X \approx X_k = U_k \Sigma_k V_k^\top + .. note:: + LSA is also known as latent semantic indexing, LSI, + though strictly that refers to its use in persistent indexes + for information retrieval purposes. -After this operation, :math:`U_k \Sigma_k` -is the transformed training set with :math:`k` features -(called ``n_components`` in the API). + Mathematically, truncated SVD applied to training samples :math:`X` + produces a low-rank approximation :math:`X`: -To also transform a test set :math:`X`, we multiply it with :math:`V_k`: + .. math:: + X \approx X_k = U_k \Sigma_k V_k^\top -.. math:: - X' = X V_k + After this operation, :math:`U_k \Sigma_k` + is the transformed training set with :math:`k` features + (called ``n_components`` in the API). -.. note:: - Most treatments of LSA in the natural language processing (NLP) - and information retrieval (IR) literature - swap the axes of the matrix :math:`X` so that it has shape - ``n_features`` × ``n_samples``. - We present LSA in a different way that matches the scikit-learn API better, - but the singular values found are the same. + To also transform a test set :math:`X`, we multiply it with :math:`V_k`: + .. math:: + X' = X V_k -While the :class:`TruncatedSVD` transformer -works with any feature matrix, -using it on tf–idf matrices is recommended over raw frequency counts -in an LSA/document processing setting. -In particular, sublinear scaling and inverse document frequency -should be turned on (``sublinear_tf=True, use_idf=True``) -to bring the feature values closer to a Gaussian distribution, -compensating for LSA's erroneous assumptions about textual data. + .. note:: + Most treatments of LSA in the natural language processing (NLP) + and information retrieval (IR) literature + swap the axes of the matrix :math:`X` so that it has shape + ``(n_features, n_samples)``. + We present LSA in a different way that matches the scikit-learn API better, + but the singular values found are the same. -|details-end| + While the :class:`TruncatedSVD` transformer + works with any feature matrix, + using it on tf-idf matrices is recommended over raw frequency counts + in an LSA/document processing setting. + In particular, sublinear scaling and inverse document frequency + should be turned on (``sublinear_tf=True, use_idf=True``) + to bring the feature values closer to a Gaussian distribution, + compensating for LSA's erroneous assumptions about textual data. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` -.. topic:: References: +.. rubric:: References - * Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze (2008), - *Introduction to Information Retrieval*, Cambridge University Press, - chapter 18: `Matrix decompositions & latent semantic indexing - `_ +* Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze (2008), + *Introduction to Information Retrieval*, Cambridge University Press, + chapter 18: `Matrix decompositions & latent semantic indexing + `_ @@ -509,9 +500,9 @@ the split code is filled with the negative part of the code vector, only with a positive sign. Therefore, the split_code is non-negative. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_sparse_coding.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_sparse_coding.py` Generic dictionary learning @@ -534,7 +525,7 @@ dictionary fixed, and then updating the dictionary to best fit the sparse code. .. math:: (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2} ||X-UV||_{\text{Fro}}^2+\alpha||U||_{1,1} \\ - \text{subject to } & ||V_k||_2 <= 1 \text{ for all } + \text{subject to } & ||V_k||_2 \leq 1 \text{ for all } 0 \leq k < n_{\mathrm{atoms}} @@ -591,16 +582,16 @@ extracted from part of the image of a raccoon face looks like. :scale: 50% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_image_denoising.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_image_denoising.py` -.. topic:: References: +.. rubric:: References - * `"Online dictionary learning for sparse coding" - `_ - J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 +* `"Online dictionary learning for sparse coding" + `_ + J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009 .. _MiniBatchDictionaryLearning: @@ -619,7 +610,7 @@ implement a stopping condition. The estimator also implements ``partial_fit``, which updates the dictionary by iterating only once over a mini-batch. This can be used for online learning when the data is not readily available from the start, or for when the data -does not fit into the memory. +does not fit into memory. .. currentmodule:: sklearn.cluster @@ -731,10 +722,10 @@ Varimax rotation maximizes the sum of the variances of the squared loadings, i.e., it tends to produce sparser factors, which are influenced by only a few features each (the "simple structure"). See e.g., the first example below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_varimax_fa.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_varimax_fa.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` .. _ICA: @@ -748,7 +739,7 @@ implemented in scikit-learn using the :class:`Fast ICA ` algorithm. Typically, ICA is not used for reducing dimensionality but for separating superimposed signals. Since the ICA model does not include a noise term, for the model to be correct, whitening must be applied. -This can be done internally using the whiten argument or manually using one +This can be done internally using the `whiten` argument or manually using one of the PCA variants. It is classically used to separate mixed signals (a problem known as @@ -773,11 +764,11 @@ components with some sparsity: .. centered:: |pca_img4| |ica_img4| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_ica_blind_source_separation.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_ica_vs_pca.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_ica_blind_source_separation.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_ica_vs_pca.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` .. _NMF: @@ -886,7 +877,7 @@ Or, the Itakura-Saito (IS) divergence: d_{IS}(X, Y) = \sum_{i,j} (\frac{X_{ij}}{Y_{ij}} - \log(\frac{X_{ij}}{Y_{ij}}) - 1) These three distances are special cases of the beta-divergence family, with -:math:`\beta = 2, 1, 0` respectively [6]_. The beta-divergence are +:math:`\beta = 2, 1, 0` respectively [6]_. The beta-divergence is defined by : .. math:: @@ -900,24 +891,20 @@ Note that this definition is not valid if :math:`\beta \in (0; 1)`, yet it can be continuously extended to the definitions of :math:`d_{KL}` and :math:`d_{IS}` respectively. -|details-start| -**NMF implemented solvers** -|details-split| - -:class:`NMF` implements two solvers, using Coordinate Descent ('cd') [5]_, and -Multiplicative Update ('mu') [6]_. The 'mu' solver can optimize every -beta-divergence, including of course the Frobenius norm (:math:`\beta=2`), the -(generalized) Kullback-Leibler divergence (:math:`\beta=1`) and the -Itakura-Saito divergence (:math:`\beta=0`). Note that for -:math:`\beta \in (1; 2)`, the 'mu' solver is significantly faster than for other -values of :math:`\beta`. Note also that with a negative (or 0, i.e. -'itakura-saito') :math:`\beta`, the input matrix cannot contain zero values. +.. dropdown:: NMF implemented solvers -The 'cd' solver can only optimize the Frobenius norm. Due to the -underlying non-convexity of NMF, the different solvers may converge to -different minima, even when optimizing the same distance function. + :class:`NMF` implements two solvers, using Coordinate Descent ('cd') [5]_, and + Multiplicative Update ('mu') [6]_. The 'mu' solver can optimize every + beta-divergence, including of course the Frobenius norm (:math:`\beta=2`), the + (generalized) Kullback-Leibler divergence (:math:`\beta=1`) and the + Itakura-Saito divergence (:math:`\beta=0`). Note that for + :math:`\beta \in (1; 2)`, the 'mu' solver is significantly faster than for other + values of :math:`\beta`. Note also that with a negative (or 0, i.e. + 'itakura-saito') :math:`\beta`, the input matrix cannot contain zero values. -|details-end| + The 'cd' solver can only optimize the Frobenius norm. Due to the + underlying non-convexity of NMF, the different solvers may converge to + different minima, even when optimizing the same distance function. NMF is best used with the ``fit_transform`` method, which returns the matrix W. The matrix H is stored into the fitted model in the ``components_`` attribute; @@ -935,10 +922,10 @@ stored components:: -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` +* :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` .. _MiniBatchNMF: @@ -956,40 +943,40 @@ the size of the batches. In order to speed up the mini-batch algorithm it is also possible to scale past batches, giving them less importance than newer batches. This is done -introducing a so-called forgetting factor controlled by the ``forget_factor`` +by introducing a so-called forgetting factor controlled by the ``forget_factor`` parameter. The estimator also implements ``partial_fit``, which updates ``H`` by iterating only once over a mini-batch. This can be used for online learning when the data is not readily available from the start, or when the data does not fit into memory. -.. topic:: References: +.. rubric:: References - .. [1] `"Learning the parts of objects by non-negative matrix factorization" - `_ - D. Lee, S. Seung, 1999 +.. [1] `"Learning the parts of objects by non-negative matrix factorization" + `_ + D. Lee, S. Seung, 1999 - .. [2] `"Non-negative Matrix Factorization with Sparseness Constraints" - `_ - P. Hoyer, 2004 +.. [2] `"Non-negative Matrix Factorization with Sparseness Constraints" + `_ + P. Hoyer, 2004 - .. [4] `"SVD based initialization: A head start for nonnegative - matrix factorization" - `_ - C. Boutsidis, E. Gallopoulos, 2008 +.. [4] `"SVD based initialization: A head start for nonnegative + matrix factorization" + `_ + C. Boutsidis, E. Gallopoulos, 2008 - .. [5] `"Fast local algorithms for large scale nonnegative matrix and tensor - factorizations." - `_ - A. Cichocki, A. Phan, 2009 +.. [5] `"Fast local algorithms for large scale nonnegative matrix and tensor + factorizations." + `_ + A. Cichocki, A. Phan, 2009 - .. [6] :arxiv:`"Algorithms for nonnegative matrix factorization with - the beta-divergence" <1010.1763>` - C. Fevotte, J. Idier, 2011 +.. [6] :arxiv:`"Algorithms for nonnegative matrix factorization with + the beta-divergence" <1010.1763>` + C. Fevotte, J. Idier, 2011 - .. [7] :arxiv:`"Online algorithms for nonnegative matrix factorization with the - Itakura-Saito divergence" <1106.4198>` - A. Lefevre, F. Bach, C. Fevotte, 2011 +.. [7] :arxiv:`"Online algorithms for nonnegative matrix factorization with the + Itakura-Saito divergence" <1106.4198>` + A. Lefevre, F. Bach, C. Fevotte, 2011 .. _LatentDirichletAllocation: @@ -997,7 +984,7 @@ Latent Dirichlet Allocation (LDA) ================================= Latent Dirichlet Allocation is a generative probabilistic model for collections of -discrete dataset such as text corpora. It is also a topic model that is used for +discrete datasets such as text corpora. It is also a topic model that is used for discovering abstract topics from a collection of documents. The graphical model of LDA is a three-level generative model: @@ -1021,51 +1008,48 @@ of topics in the corpus and the distribution of words in the documents. The goal of LDA is to use the observed words to infer the hidden topic structure. -|details-start| -**Details on modeling text corpora** -|details-split| +.. dropdown:: Details on modeling text corpora -When modeling text corpora, the model assumes the following generative process -for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` -corresponding to `n_components` in the API: + When modeling text corpora, the model assumes the following generative process + for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` + corresponding to `n_components` in the API: -1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim - \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, - i.e. the probability of a word appearing in topic :math:`k`. - :math:`\eta` corresponds to `topic_word_prior`. + 1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim + \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, + i.e. the probability of a word appearing in topic :math:`k`. + :math:`\eta` corresponds to `topic_word_prior`. -2. For each document :math:`d \in D`, draw the topic proportions - :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` - corresponds to `doc_topic_prior`. + 2. For each document :math:`d \in D`, draw the topic proportions + :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` + corresponds to `doc_topic_prior`. -3. For each word :math:`i` in document :math:`d`: + 3. For each word :math:`i` in document :math:`d`: - a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} - (\theta_d)` - b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} - (\beta_{z_{di}})` + a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} + (\theta_d)` + b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} + (\beta_{z_{di}})` -For parameter estimation, the posterior distribution is: + For parameter estimation, the posterior distribution is: -.. math:: - p(z, \theta, \beta |w, \alpha, \eta) = - \frac{p(z, \theta, \beta|\alpha, \eta)}{p(w|\alpha, \eta)} + .. math:: + p(z, \theta, \beta |w, \alpha, \eta) = + \frac{p(z, \theta, \beta|\alpha, \eta)}{p(w|\alpha, \eta)} -Since the posterior is intractable, variational Bayesian method -uses a simpler distribution :math:`q(z,\theta,\beta | \lambda, \phi, \gamma)` -to approximate it, and those variational parameters :math:`\lambda`, -:math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence -Lower Bound (ELBO): + Since the posterior is intractable, variational Bayesian method + uses a simpler distribution :math:`q(z,\theta,\beta | \lambda, \phi, \gamma)` + to approximate it, and those variational parameters :math:`\lambda`, + :math:`\phi`, :math:`\gamma` are optimized to maximize the Evidence + Lower Bound (ELBO): -.. math:: - \log\: P(w | \alpha, \eta) \geq L(w,\phi,\gamma,\lambda) \overset{\triangle}{=} - E_{q}[\log\:p(w,z,\theta,\beta|\alpha,\eta)] - E_{q}[\log\:q(z, \theta, \beta)] + .. math:: + \log\: P(w | \alpha, \eta) \geq L(w,\phi,\gamma,\lambda) \overset{\triangle}{=} + E_{q}[\log\:p(w,z,\theta,\beta|\alpha,\eta)] - E_{q}[\log\:q(z, \theta, \beta)] -Maximizing ELBO is equivalent to minimizing the Kullback-Leibler(KL) divergence -between :math:`q(z,\theta,\beta)` and the true posterior -:math:`p(z, \theta, \beta |w, \alpha, \eta)`. + Maximizing ELBO is equivalent to minimizing the Kullback-Leibler(KL) divergence + between :math:`q(z,\theta,\beta)` and the true posterior + :math:`p(z, \theta, \beta |w, \alpha, \eta)`. -|details-end| :class:`LatentDirichletAllocation` implements the online variational Bayes algorithm and supports both online and batch update methods. @@ -1087,27 +1071,27 @@ can be calculated from ``transform`` method. :class:`LatentDirichletAllocation` also implements ``partial_fit`` method. This is used when data can be fetched sequentially. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` -.. topic:: References: +.. rubric:: References - * `"Latent Dirichlet Allocation" - `_ - D. Blei, A. Ng, M. Jordan, 2003 +* `"Latent Dirichlet Allocation" + `_ + D. Blei, A. Ng, M. Jordan, 2003 - * `"Online Learning for Latent Dirichlet Allocation” - `_ - M. Hoffman, D. Blei, F. Bach, 2010 +* `"Online Learning for Latent Dirichlet Allocation” + `_ + M. Hoffman, D. Blei, F. Bach, 2010 - * `"Stochastic Variational Inference" - `_ - M. Hoffman, D. Blei, C. Wang, J. Paisley, 2013 +* `"Stochastic Variational Inference" + `_ + M. Hoffman, D. Blei, C. Wang, J. Paisley, 2013 - * `"The varimax criterion for analytic rotation in factor analysis" - `_ - H. F. Kaiser, 1958 +* `"The varimax criterion for analytic rotation in factor analysis" + `_ + H. F. Kaiser, 1958 See also :ref:`nca_dim_reduction` for dimensionality reduction with Neighborhood Components Analysis. diff --git a/doc/modules/density.rst b/doc/modules/density.rst index 5a9b456010aa3..16c73bd5349a2 100644 --- a/doc/modules/density.rst +++ b/doc/modules/density.rst @@ -101,7 +101,7 @@ smooth (i.e. high-bias) density distribution. A small bandwidth leads to an unsmooth (i.e. high-variance) density distribution. The parameter `bandwidth` controls this smoothing. One can either set -manually this parameter or use Scott's and Silvermann's estimation +manually this parameter or use Scott's and Silverman's estimation methods. :class:`~sklearn.neighbors.KernelDensity` implements several common kernel @@ -113,37 +113,34 @@ forms, which are shown in the following figure: .. centered:: |kde_kernels| -|details-start| -**kernels' mathematical expressions** -|details-split| +.. dropdown:: Kernels' mathematical expressions -The form of these kernels is as follows: + The form of these kernels is as follows: -* Gaussian kernel (``kernel = 'gaussian'``) + * Gaussian kernel (``kernel = 'gaussian'``) - :math:`K(x; h) \propto \exp(- \frac{x^2}{2h^2} )` + :math:`K(x; h) \propto \exp(- \frac{x^2}{2h^2} )` -* Tophat kernel (``kernel = 'tophat'``) + * Tophat kernel (``kernel = 'tophat'``) - :math:`K(x; h) \propto 1` if :math:`x < h` + :math:`K(x; h) \propto 1` if :math:`x < h` -* Epanechnikov kernel (``kernel = 'epanechnikov'``) + * Epanechnikov kernel (``kernel = 'epanechnikov'``) - :math:`K(x; h) \propto 1 - \frac{x^2}{h^2}` + :math:`K(x; h) \propto 1 - \frac{x^2}{h^2}` -* Exponential kernel (``kernel = 'exponential'``) + * Exponential kernel (``kernel = 'exponential'``) - :math:`K(x; h) \propto \exp(-x/h)` + :math:`K(x; h) \propto \exp(-x/h)` -* Linear kernel (``kernel = 'linear'``) + * Linear kernel (``kernel = 'linear'``) - :math:`K(x; h) \propto 1 - x/h` if :math:`x < h` + :math:`K(x; h) \propto 1 - x/h` if :math:`x < h` -* Cosine kernel (``kernel = 'cosine'``) + * Cosine kernel (``kernel = 'cosine'``) - :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` + :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` -|details-end| The kernel density estimator can be used with any of the valid distance metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of @@ -177,14 +174,14 @@ on a PCA projection of the data: The "new" data consists of linear combinations of the input data, with weights probabilistically drawn given the KDE model. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_kde_1d.py`: computation of simple kernel - density estimates in one dimension. +* :ref:`sphx_glr_auto_examples_neighbors_plot_kde_1d.py`: computation of simple kernel + density estimates in one dimension. - * :ref:`sphx_glr_auto_examples_neighbors_plot_digits_kde_sampling.py`: an example of using - Kernel Density estimation to learn a generative model of the hand-written - digits data, and drawing new samples from this model. +* :ref:`sphx_glr_auto_examples_neighbors_plot_digits_kde_sampling.py`: an example of using + Kernel Density estimation to learn a generative model of the hand-written + digits data, and drawing new samples from this model. - * :ref:`sphx_glr_auto_examples_neighbors_plot_species_kde.py`: an example of Kernel Density - estimation using the Haversine distance metric to visualize geospatial data +* :ref:`sphx_glr_auto_examples_neighbors_plot_species_kde.py`: an example of Kernel Density + estimation using the Haversine distance metric to visualize geospatial data diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 4237d023973f7..f0f14c60e4867 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -18,10 +18,6 @@ trees, in averaging methods such as :ref:`Bagging methods `, :ref:`model stacking `, or :ref:`Voting `, or in boosting, as :ref:`AdaBoost `. -.. contents:: - :local: - :depth: 1 - .. _gradient_boosting: Gradient-boosted trees @@ -47,7 +43,7 @@ classification, in particular for tabular data. imputation. :class:`GradientBoostingClassifier` and - :class:`GradientBoostingRegressor`, might be preferred for small sample + :class:`GradientBoostingRegressor` might be preferred for small sample sizes since binning may lead to split points that are too approximate in this setting. @@ -78,10 +74,10 @@ estimators is slightly different, and some of the features from :class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` are not yet supported, for instance some loss functions. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` Usage ^^^^^ @@ -102,14 +98,21 @@ controls the number of iterations of the boosting process:: >>> clf.score(X_test, y_test) 0.8965 -Available losses for regression are 'squared_error', -'absolute_error', which is less sensitive to outliers, and -'poisson', which is well suited to model counts and frequencies. For -classification, 'log_loss' is the only option. For binary classification it uses the -binary log loss, also known as binomial deviance or binary cross-entropy. For -`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance -and categorical cross-entropy as alternative names. The appropriate loss version is -selected based on :term:`y` passed to :term:`fit`. +Available losses for **regression** are: + +- 'squared_error', which is the default loss; +- 'absolute_error', which is less sensitive to outliers than the squared error; +- 'gamma', which is well suited to model strictly positive outcomes; +- 'poisson', which is well suited to model counts and frequencies; +- 'quantile', which allows for estimating a conditional quantile that can later + be used to obtain prediction intervals. + +For **classification**, 'log_loss' is the only option. For binary classification +it uses the binary log loss, also known as binomial deviance or binary +cross-entropy. For `n_classes >= 3`, it uses the multi-class log loss function, +with multinomial deviance and categorical cross-entropy as alternative names. +The appropriate loss version is selected based on :term:`y` passed to +:term:`fit`. The size of the trees can be controlled through the ``max_leaf_nodes``, ``max_depth``, and ``min_samples_leaf`` parameters. @@ -126,43 +129,40 @@ in [XGBoost]_): \mathcal{L}(\phi) = \sum_i l(\hat{y}_i, y_i) + \frac12 \sum_k \lambda ||w_k||^2 -|details-start| -**Details on l2 regularization**: -|details-split| - -It is important to notice that the loss term :math:`l(\hat{y}_i, y_i)` describes -only half of the actual loss function except for the pinball loss and absolute -error. - -The index :math:`k` refers to the k-th tree in the ensemble of trees. In the -case of regression and binary classification, gradient boosting models grow one -tree per iteration, then :math:`k` runs up to `max_iter`. In the case of -multiclass classification problems, the maximal value of the index :math:`k` is -`n_classes` :math:`\times` `max_iter`. - -If :math:`T_k` denotes the number of leaves in the k-th tree, then :math:`w_k` -is a vector of length :math:`T_k`, which contains the leaf values of the form `w -= -sum_gradient / (sum_hessian + l2_regularization)` (see equation (5) in -[XGBoost]_). - -The leaf values :math:`w_k` are derived by dividing the sum of the gradients of -the loss function by the combined sum of hessians. Adding the regularization to -the denominator penalizes the leaves with small hessians (flat regions), -resulting in smaller updates. Those :math:`w_k` values contribute then to the -model's prediction for a given input that ends up in the corresponding leaf. The -final prediction is the sum of the base prediction and the contributions from -each tree. The result of that sum is then transformed by the inverse link -function depending on the choice of the loss function (see -:ref:`gradient_boosting_formulation`). - -Notice that the original paper [XGBoost]_ introduces a term :math:`\gamma\sum_k -T_k` that penalizes the number of leaves (making it a smooth version of -`max_leaf_nodes`) not presented here as it is not implemented in scikit-learn; -whereas :math:`\lambda` penalizes the magnitude of the individual tree -predictions before being rescaled by the learning rate, see -:ref:`gradient_boosting_shrinkage`. - -|details-end| +.. dropdown:: Details on l2 regularization + + It is important to notice that the loss term :math:`l(\hat{y}_i, y_i)` describes + only half of the actual loss function except for the pinball loss and absolute + error. + + The index :math:`k` refers to the k-th tree in the ensemble of trees. In the + case of regression and binary classification, gradient boosting models grow one + tree per iteration, then :math:`k` runs up to `max_iter`. In the case of + multiclass classification problems, the maximal value of the index :math:`k` is + `n_classes` :math:`\times` `max_iter`. + + If :math:`T_k` denotes the number of leaves in the k-th tree, then :math:`w_k` + is a vector of length :math:`T_k`, which contains the leaf values of the form `w + = -sum_gradient / (sum_hessian + l2_regularization)` (see equation (5) in + [XGBoost]_). + + The leaf values :math:`w_k` are derived by dividing the sum of the gradients of + the loss function by the combined sum of hessians. Adding the regularization to + the denominator penalizes the leaves with small hessians (flat regions), + resulting in smaller updates. Those :math:`w_k` values contribute then to the + model's prediction for a given input that ends up in the corresponding leaf. The + final prediction is the sum of the base prediction and the contributions from + each tree. The result of that sum is then transformed by the inverse link + function depending on the choice of the loss function (see + :ref:`gradient_boosting_formulation`). + + Notice that the original paper [XGBoost]_ introduces a term :math:`\gamma\sum_k + T_k` that penalizes the number of leaves (making it a smooth version of + `max_leaf_nodes`) not presented here as it is not implemented in scikit-learn; + whereas :math:`\lambda` penalizes the magnitude of the individual tree + predictions before being rescaled by the learning rate, see + :ref:`gradient_boosting_shrinkage`. + Note that **early-stopping is enabled by default if the number of samples is larger than 10,000**. The early-stopping behaviour is controlled via the @@ -213,9 +213,9 @@ If no missing values were encountered for a given feature during training, then samples with missing values are mapped to whichever child has the most samples. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` .. _sw_hgbdt: @@ -241,7 +241,7 @@ The following toy example demonstrates that samples with a sample weight of zero >>> gb.predict([[1, 0]]) array([1]) >>> gb.predict_proba([[1, 0]])[0, 1] - 0.99... + np.float64(0.999) As you can see, the `[1, 0]` is comfortably classified as `1` since the first two samples are ignored due to their sample weights. @@ -302,30 +302,25 @@ the most samples (just like for continuous features). When predicting, categories that were not seen during fit time will be treated as missing values. -|details-start| -**Split finding with categorical features**: -|details-split| +.. dropdown:: Split finding with categorical features -The canonical way of considering -categorical splits in a tree is to consider -all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of -categories. This can quickly become prohibitive when :math:`K` is large. -Fortunately, since gradient boosting trees are always regression trees (even -for classification problems), there exist a faster strategy that can yield -equivalent splits. First, the categories of a feature are sorted according to -the variance of the target, for each category `k`. Once the categories are -sorted, one can consider *continuous partitions*, i.e. treat the categories -as if they were ordered continuous values (see Fisher [Fisher1958]_ for a -formal proof). As a result, only :math:`K - 1` splits need to be considered -instead of :math:`2^{K - 1} - 1`. The initial sorting is a -:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of -:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. + The canonical way of considering categorical splits in a tree is to consider + all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of + categories. This can quickly become prohibitive when :math:`K` is large. + Fortunately, since gradient boosting trees are always regression trees (even + for classification problems), there exist a faster strategy that can yield + equivalent splits. First, the categories of a feature are sorted according to + the variance of the target, for each category `k`. Once the categories are + sorted, one can consider *continuous partitions*, i.e. treat the categories + as if they were ordered continuous values (see Fisher [Fisher1958]_ for a + formal proof). As a result, only :math:`K - 1` splits need to be considered + instead of :math:`2^{K - 1} - 1`. The initial sorting is a + :math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of + :math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. -|details-end| +.. rubric:: Examples -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` .. _monotonic_cst_gbdt: @@ -378,10 +373,10 @@ Also, monotonic constraints are not supported for multiclass classification. Since categories are unordered quantities, it is not possible to enforce monotonic constraints on categorical features. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` .. _interaction_cst_hgbt: @@ -396,7 +391,7 @@ done by the parameter ``interaction_cst``, where one can specify the indices of features that are allowed to interact. For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]`` forbids all interactions. -The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly +The constraints ``[{0, 1}, {1, 2}]`` specify two groups of possibly interacting features. Features 0 and 1 may interact with each other, as well as features 1 and 2. But note that features 0 and 2 are forbidden to interact. The following depicts a tree and the possible splits of the tree: @@ -414,16 +409,16 @@ Note that features not listed in ``interaction_cst`` are automatically assigned an interaction group for themselves. With again 3 features, this means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` -.. topic:: References +.. rubric:: References - .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. - 2022. :doi:`Machine Learning Applications to Land and Structure Valuation - <10.3390/jrfm15050193>`. - Journal of Risk and Financial Management 15, no. 5: 193 +.. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. + 2022. :doi:`Machine Learning Applications to Land and Structure Valuation + <10.3390/jrfm15050193>`. + Journal of Risk and Financial Management 15, no. 5: 193 Low-level parallelism ^^^^^^^^^^^^^^^^^^^^^ @@ -479,18 +474,18 @@ Finally, many parts of the implementation of :class:`HistGradientBoostingClassifier` and :class:`HistGradientBoostingRegressor` are parallelized. -.. topic:: References +.. rubric:: References - .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree - Boosting System" <1603.02754>` +.. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree + Boosting System" <1603.02754>` - .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient - BoostingDecision Tree" `_ +.. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient + BoostingDecision Tree" `_ - .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" - `_ - Journal of the American Statistical Association, 53, 789-798. +.. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" + `_ + Journal of the American Statistical Association, 53, 789-798. @@ -501,96 +496,88 @@ The usage and the parameters of :class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` are described below. The 2 most important parameters of these estimators are `n_estimators` and `learning_rate`. -|details-start| -**Classification** -|details-split| - -:class:`GradientBoostingClassifier` supports both binary and multi-class -classification. -The following example shows how to fit a gradient boosting classifier -with 100 decision stumps as weak learners:: - - >>> from sklearn.datasets import make_hastie_10_2 - >>> from sklearn.ensemble import GradientBoostingClassifier - - >>> X, y = make_hastie_10_2(random_state=0) - >>> X_train, X_test = X[:2000], X[2000:] - >>> y_train, y_test = y[:2000], y[2000:] - - >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, - ... max_depth=1, random_state=0).fit(X_train, y_train) - >>> clf.score(X_test, y_test) - 0.913... - -The number of weak learners (i.e. regression trees) is controlled by the -parameter ``n_estimators``; :ref:`The size of each tree -` can be controlled either by setting the tree -depth via ``max_depth`` or by setting the number of leaf nodes via -``max_leaf_nodes``. The ``learning_rate`` is a hyper-parameter in the range -(0.0, 1.0] that controls overfitting via :ref:`shrinkage -` . - -.. note:: - - Classification with more than 2 classes requires the induction - of ``n_classes`` regression trees at each iteration, - thus, the total number of induced trees equals - ``n_classes * n_estimators``. For datasets with a large number - of classes we strongly recommend to use - :class:`HistGradientBoostingClassifier` as an alternative to - :class:`GradientBoostingClassifier` . - -|details-end| - -|details-start| -**Regression** -|details-split| - -:class:`GradientBoostingRegressor` supports a number of -:ref:`different loss functions ` -for regression which can be specified via the argument -``loss``; the default loss function for regression is squared error -(``'squared_error'``). - -:: - - >>> import numpy as np - >>> from sklearn.metrics import mean_squared_error - >>> from sklearn.datasets import make_friedman1 - >>> from sklearn.ensemble import GradientBoostingRegressor - - >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) - >>> X_train, X_test = X[:200], X[200:] - >>> y_train, y_test = y[:200], y[200:] - >>> est = GradientBoostingRegressor( - ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, - ... loss='squared_error' - ... ).fit(X_train, y_train) - >>> mean_squared_error(y_test, est.predict(X_test)) - 5.00... - -The figure below shows the results of applying :class:`GradientBoostingRegressor` -with least squares loss and 500 base learners to the diabetes dataset -(:func:`sklearn.datasets.load_diabetes`). -The plot shows the train and test error at each iteration. -The train error at each iteration is stored in the -`train_score_` attribute of the gradient boosting model. -The test error at each iterations can be obtained -via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a -generator that yields the predictions at each stage. Plots like these can be used -to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping. - -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_regression_001.png - :target: ../auto_examples/ensemble/plot_gradient_boosting_regression.html - :align: center - :scale: 75 - -|details-end| +.. dropdown:: Classification + + :class:`GradientBoostingClassifier` supports both binary and multi-class + classification. + The following example shows how to fit a gradient boosting classifier + with 100 decision stumps as weak learners:: + + >>> from sklearn.datasets import make_hastie_10_2 + >>> from sklearn.ensemble import GradientBoostingClassifier + + >>> X, y = make_hastie_10_2(random_state=0) + >>> X_train, X_test = X[:2000], X[2000:] + >>> y_train, y_test = y[:2000], y[2000:] + + >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, + ... max_depth=1, random_state=0).fit(X_train, y_train) + >>> clf.score(X_test, y_test) + 0.913 + + The number of weak learners (i.e. regression trees) is controlled by the + parameter ``n_estimators``; :ref:`The size of each tree + ` can be controlled either by setting the tree + depth via ``max_depth`` or by setting the number of leaf nodes via + ``max_leaf_nodes``. The ``learning_rate`` is a hyper-parameter in the range + (0.0, 1.0] that controls overfitting via :ref:`shrinkage + ` . + + .. note:: + + Classification with more than 2 classes requires the induction + of ``n_classes`` regression trees at each iteration, + thus, the total number of induced trees equals + ``n_classes * n_estimators``. For datasets with a large number + of classes we strongly recommend to use + :class:`HistGradientBoostingClassifier` as an alternative to + :class:`GradientBoostingClassifier` . + +.. dropdown:: Regression + + :class:`GradientBoostingRegressor` supports a number of + :ref:`different loss functions ` + for regression which can be specified via the argument + ``loss``; the default loss function for regression is squared error + (``'squared_error'``). + + :: + + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from sklearn.datasets import make_friedman1 + >>> from sklearn.ensemble import GradientBoostingRegressor + + >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) + >>> X_train, X_test = X[:200], X[200:] + >>> y_train, y_test = y[:200], y[200:] + >>> est = GradientBoostingRegressor( + ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, + ... loss='squared_error' + ... ).fit(X_train, y_train) + >>> mean_squared_error(y_test, est.predict(X_test)) + 5.00 + + The figure below shows the results of applying :class:`GradientBoostingRegressor` + with least squares loss and 500 base learners to the diabetes dataset + (:func:`sklearn.datasets.load_diabetes`). + The plot shows the train and test error at each iteration. + The train error at each iteration is stored in the + `train_score_` attribute of the gradient boosting model. + The test error at each iteration can be obtained + via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a + generator that yields the predictions at each stage. Plots like these can be used + to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping. + + .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_regression_001.png + :target: ../auto_examples/ensemble/plot_gradient_boosting_regression.html + :align: center + :scale: 75 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` .. _gradient_boosting_warm_start: @@ -603,10 +590,25 @@ fitted model. :: - >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and new nr of trees + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from sklearn.datasets import make_friedman1 + >>> from sklearn.ensemble import GradientBoostingRegressor + + >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) + >>> X_train, X_test = X[:200], X[200:] + >>> y_train, y_test = y[:200], y[200:] + >>> est = GradientBoostingRegressor( + ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, + ... loss='squared_error' + ... ) + >>> est = est.fit(X_train, y_train) # fit with 100 trees + >>> mean_squared_error(y_test, est.predict(X_test)) + 5.00 + >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and increase num of trees >>> _ = est.fit(X_train, y_train) # fit additional 100 trees to est >>> mean_squared_error(y_test, est.predict(X_test)) - 3.84... + 3.84 .. _gradient_boosting_tree_size: @@ -645,116 +647,108 @@ Mathematical formulation We first present GBRT for regression, and then detail the classification case. -|details-start| -**Regression** -|details-split| - -GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a -given input :math:`x_i` is of the following form: +.. dropdown:: Regression -.. math:: + GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a + given input :math:`x_i` is of the following form: - \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i) - -where the :math:`h_m` are estimators called *weak learners* in the context -of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors -` of fixed size as weak learners. The constant M corresponds to the -`n_estimators` parameter. + .. math:: -Similar to other boosting algorithms, a GBRT is built in a greedy fashion: + \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i) -.. math:: + where the :math:`h_m` are estimators called *weak learners* in the context + of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors + ` of fixed size as weak learners. The constant M corresponds to the + `n_estimators` parameter. - F_m(x) = F_{m-1}(x) + h_m(x), + Similar to other boosting algorithms, a GBRT is built in a greedy fashion: -where the newly added tree :math:`h_m` is fitted in order to minimize a sum -of losses :math:`L_m`, given the previous ensemble :math:`F_{m-1}`: + .. math:: -.. math:: + F_m(x) = F_{m-1}(x) + h_m(x), - h_m = \arg\min_{h} L_m = \arg\min_{h} \sum_{i=1}^{n} - l(y_i, F_{m-1}(x_i) + h(x_i)), + where the newly added tree :math:`h_m` is fitted in order to minimize a sum + of losses :math:`L_m`, given the previous ensemble :math:`F_{m-1}`: -where :math:`l(y_i, F(x_i))` is defined by the `loss` parameter, detailed -in the next section. + .. math:: -By default, the initial model :math:`F_{0}` is chosen as the constant that -minimizes the loss: for a least-squares loss, this is the empirical mean of -the target values. The initial model can also be specified via the ``init`` -argument. + h_m = \arg\min_{h} L_m = \arg\min_{h} \sum_{i=1}^{n} + l(y_i, F_{m-1}(x_i) + h(x_i)), -Using a first-order Taylor approximation, the value of :math:`l` can be -approximated as follows: + where :math:`l(y_i, F(x_i))` is defined by the `loss` parameter, detailed + in the next section. -.. math:: + By default, the initial model :math:`F_{0}` is chosen as the constant that + minimizes the loss: for a least-squares loss, this is the empirical mean of + the target values. The initial model can also be specified via the ``init`` + argument. - l(y_i, F_{m-1}(x_i) + h_m(x_i)) \approx - l(y_i, F_{m-1}(x_i)) - + h_m(x_i) - \left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} \right]_{F=F_{m - 1}}. + Using a first-order Taylor approximation, the value of :math:`l` can be + approximated as follows: -.. note:: + .. math:: - Briefly, a first-order Taylor approximation says that - :math:`l(z) \approx l(a) + (z - a) \frac{\partial l}{\partial z}(a)`. - Here, :math:`z` corresponds to :math:`F_{m - 1}(x_i) + h_m(x_i)`, and - :math:`a` corresponds to :math:`F_{m-1}(x_i)` + l(y_i, F_{m-1}(x_i) + h_m(x_i)) \approx + l(y_i, F_{m-1}(x_i)) + + h_m(x_i) + \left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} \right]_{F=F_{m - 1}}. -The quantity :math:`\left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} -\right]_{F=F_{m - 1}}` is the derivative of the loss with respect to its -second parameter, evaluated at :math:`F_{m-1}(x)`. It is easy to compute for -any given :math:`F_{m - 1}(x_i)` in a closed form since the loss is -differentiable. We will denote it by :math:`g_i`. + .. note:: -Removing the constant terms, we have: + Briefly, a first-order Taylor approximation says that + :math:`l(z) \approx l(a) + (z - a) \frac{\partial l}{\partial z}(a)`. + Here, :math:`z` corresponds to :math:`F_{m - 1}(x_i) + h_m(x_i)`, and + :math:`a` corresponds to :math:`F_{m-1}(x_i)` -.. math:: + The quantity :math:`\left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} + \right]_{F=F_{m - 1}}` is the derivative of the loss with respect to its + second parameter, evaluated at :math:`F_{m-1}(x)`. It is easy to compute for + any given :math:`F_{m - 1}(x_i)` in a closed form since the loss is + differentiable. We will denote it by :math:`g_i`. - h_m \approx \arg\min_{h} \sum_{i=1}^{n} h(x_i) g_i + Removing the constant terms, we have: -This is minimized if :math:`h(x_i)` is fitted to predict a value that is -proportional to the negative gradient :math:`-g_i`. Therefore, at each -iteration, **the estimator** :math:`h_m` **is fitted to predict the negative -gradients of the samples**. The gradients are updated at each iteration. -This can be considered as some kind of gradient descent in a functional -space. + .. math:: -.. note:: + h_m \approx \arg\min_{h} \sum_{i=1}^{n} h(x_i) g_i - For some losses, e.g. ``'absolute_error'`` where the gradients - are :math:`\pm 1`, the values predicted by a fitted :math:`h_m` are not - accurate enough: the tree can only output integer values. As a result, the - leaves values of the tree :math:`h_m` are modified once the tree is - fitted, such that the leaves values minimize the loss :math:`L_m`. The - update is loss-dependent: for the absolute error loss, the value of - a leaf is updated to the median of the samples in that leaf. + This is minimized if :math:`h(x_i)` is fitted to predict a value that is + proportional to the negative gradient :math:`-g_i`. Therefore, at each + iteration, **the estimator** :math:`h_m` **is fitted to predict the negative + gradients of the samples**. The gradients are updated at each iteration. + This can be considered as some kind of gradient descent in a functional + space. -|details-end| + .. note:: -|details-start| -**Classification** -|details-split| + For some losses, e.g. ``'absolute_error'`` where the gradients + are :math:`\pm 1`, the values predicted by a fitted :math:`h_m` are not + accurate enough: the tree can only output integer values. As a result, the + leaves values of the tree :math:`h_m` are modified once the tree is + fitted, such that the leaves values minimize the loss :math:`L_m`. The + update is loss-dependent: for the absolute error loss, the value of + a leaf is updated to the median of the samples in that leaf. -Gradient boosting for classification is very similar to the regression case. -However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not -homogeneous to a prediction: it cannot be a class, since the trees predict -continuous values. +.. dropdown:: Classification -The mapping from the value :math:`F_M(x_i)` to a class or a probability is -loss-dependent. For the log-loss, the probability that -:math:`x_i` belongs to the positive class is modeled as :math:`p(y_i = 1 | -x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid or expit function. + Gradient boosting for classification is very similar to the regression case. + However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not + homogeneous to a prediction: it cannot be a class, since the trees predict + continuous values. -For multiclass classification, K trees (for K classes) are built at each of -the :math:`M` iterations. The probability that :math:`x_i` belongs to class -k is modeled as a softmax of the :math:`F_{M,k}(x_i)` values. + The mapping from the value :math:`F_M(x_i)` to a class or a probability is + loss-dependent. For the log-loss, the probability that + :math:`x_i` belongs to the positive class is modeled as :math:`p(y_i = 1 | + x_i) = \sigma(F_M(x_i))` where :math:`\sigma` is the sigmoid or expit function. -Note that even for a classification task, the :math:`h_m` sub-estimator is -still a regressor, not a classifier. This is because the sub-estimators are -trained to predict (negative) *gradients*, which are always continuous -quantities. + For multiclass classification, K trees (for K classes) are built at each of + the :math:`M` iterations. The probability that :math:`x_i` belongs to class + k is modeled as a softmax of the :math:`F_{M,k}(x_i)` values. -|details-end| + Note that even for a classification task, the :math:`h_m` sub-estimator is + still a regressor, not a classifier. This is because the sub-estimators are + trained to predict (negative) *gradients*, which are always continuous + quantities. .. _gradient_boosting_loss: @@ -764,9 +758,7 @@ Loss Functions The following loss functions are supported and can be specified using the parameter ``loss``: -|details-start| -**Regression** -|details-split| +.. dropdown:: Regression * Squared error (``'squared_error'``): The natural choice for regression due to its superior computational properties. The initial model is @@ -783,12 +775,7 @@ the parameter ``loss``: can be used to create prediction intervals (see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`). -|details-end| - - -|details-start| -**Classification** -|details-split| +.. dropdown:: Classification * Binary log-loss (``'log-loss'``): The binomial negative log-likelihood loss function for binary classification. It provides @@ -806,8 +793,6 @@ the parameter ``loss``: examples than ``'log-loss'``; can only be used for binary classification. -|details-end| - .. _gradient_boosting_shrinkage: Shrinkage via learning rate @@ -821,7 +806,7 @@ the contribution of each weak learner by a constant factor :math:`\nu`: F_m(x) = F_{m-1}(x) + \nu h_m(x) The parameter :math:`\nu` is also called the **learning rate** because -it scales the step length the gradient descent procedure; it can +it scales the step length of the gradient descent procedure; it can be set via the ``learning_rate`` parameter. The parameter ``learning_rate`` strongly interacts with the parameter @@ -874,11 +859,11 @@ the optimal number of iterations. OOB estimates are usually very pessimistic thu we recommend to use cross-validation instead and only use OOB if cross-validation is too time consuming. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regularization.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regularization.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py` Interpretation with feature importance ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -894,7 +879,7 @@ Often features do not contribute equally to predict the target response; in many situations the majority of the features are in fact irrelevant. When interpreting a model, the first question usually is: what are -those important features and how do they contributing in predicting +those important features and how do they contribute in predicting the target response? Individual decision trees intrinsically perform feature selection by selecting @@ -915,28 +900,29 @@ accessed via the ``feature_importances_`` property:: >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, ... max_depth=1, random_state=0).fit(X, y) >>> clf.feature_importances_ - array([0.10..., 0.10..., 0.11..., ... + array([0.107, 0.105, 0.113, 0.0987, 0.0947, + 0.107, 0.0916, 0.0972, 0.0958, 0.0906]) Note that this computation of feature importance is based on entropy, and it is distinct from :func:`sklearn.inspection.permutation_importance` which is based on permutation of the features. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` -.. topic:: References +.. rubric:: References - .. [Friedman2001] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient - boosting machine <10.1214/aos/1013203451>`. - Annals of Statistics, 29, 1189-1232. +.. [Friedman2001] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient + boosting machine <10.1214/aos/1013203451>`. + Annals of Statistics, 29, 1189-1232. - .. [Friedman2002] Friedman, J.H. (2002). `Stochastic gradient boosting. - `_. - Computational Statistics & Data Analysis, 38, 367-378. +.. [Friedman2002] Friedman, J.H. (2002). `Stochastic gradient boosting. + `_. + Computational Statistics & Data Analysis, 38, 367-378. - .. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm - package `_ +.. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm + package `_ .. _forest: @@ -1020,9 +1006,9 @@ characteristics of the dataset and the modeling task. It's a good idea to try both models and compare their performance and computational efficiency on your specific problem to determine which model is the best fit. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` Extremely Randomized Trees -------------------------- @@ -1050,19 +1036,19 @@ in bias:: ... random_state=0) >>> scores = cross_val_score(clf, X, y, cv=5) >>> scores.mean() - 0.98... + np.float64(0.98) >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None, ... min_samples_split=2, random_state=0) >>> scores = cross_val_score(clf, X, y, cv=5) >>> scores.mean() - 0.999... + np.float64(0.999) >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, ... min_samples_split=2, random_state=0) >>> scores = cross_val_score(clf, X, y, cv=5) >>> scores.mean() > 0.999 - True + np.True_ .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png :target: ../auto_examples/ensemble/plot_forest_iris.html @@ -1119,20 +1105,19 @@ fast). Significant speedup can still be achieved though when building a large number of trees, or when building a single tree requires a fair amount of time (e.g., on large datasets). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` -.. topic:: References +.. rubric:: References - .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. +.. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. - .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. +.. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. - * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized - trees", Machine Learning, 63(1), 3-42, 2006. +* P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized + trees", Machine Learning, 63(1), 3-42, 2006. .. _random_forest_feature_importance: @@ -1169,31 +1154,21 @@ evaluation with Random Forests. obtaining feature importance are explored in: :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`. -The following example shows a color-coded representation of the relative -importances of each individual pixel for a face recognition task using -a :class:`ExtraTreesClassifier` model. - -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png - :target: ../auto_examples/ensemble/plot_forest_importances_faces.html - :align: center - :scale: 75 - In practice those estimates are stored as an attribute named ``feature_importances_`` on the fitted model. This is an array with shape ``(n_features,)`` whose values are positive and sum to 1.0. The higher the value, the more important is the contribution of the matching feature to the prediction function. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` -.. topic:: References +.. rubric:: References - .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to - Practice" <1407.7502>`, - PhD Thesis, U. of Liege, 2014. +.. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to + Practice" <1407.7502>`, + PhD Thesis, U. of Liege, 2014. .. _random_trees_embedding: @@ -1216,15 +1191,15 @@ As neighboring data points are more likely to lie within the same leaf of a tree, the transformation performs an implicit, non-parametric density estimation. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` - * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear - dimensionality reduction techniques on handwritten digits. +* :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear + dimensionality reduction techniques on handwritten digits. - * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares - supervised and unsupervised tree based feature transformations. +* :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares + supervised and unsupervised tree based feature transformations. .. seealso:: @@ -1232,6 +1207,43 @@ estimation. representations of feature space, also these approaches focus also on dimensionality reduction. +.. _tree_ensemble_warm_start: + +Fitting additional trees +------------------------ + +RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all support +``warm_start=True`` which allows you to add more trees to an already fitted model. + +:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + + >>> X, y = make_classification(n_samples=100, random_state=1) + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, y) # fit with 10 trees + >>> len(clf.estimators_) + 10 + >>> # set warm_start and increase num of estimators + >>> _ = clf.set_params(n_estimators=20, warm_start=True) + >>> _ = clf.fit(X, y) # fit additional 10 trees + >>> len(clf.estimators_) + 20 + +When ``random_state`` is also set, the internal random state is also preserved +between ``fit`` calls. This means that training a model once with ``n`` estimators is +the same as building the model iteratively via multiple ``fit`` calls, where the +final number of estimators is equal to ``n``. + +:: + + >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 + >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as `clf` above + +Note that this differs from the usual behavior of :term:`random_state` in that it does +*not* result in the same result across different calls. + .. _bagging: Bagging meta-estimator @@ -1283,24 +1295,23 @@ subsets of 50% of the samples and 50% of the features. >>> bagging = BaggingClassifier(KNeighborsClassifier(), ... max_samples=0.5, max_features=0.5) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` -.. topic:: References +.. rubric:: References - .. [B1999] L. Breiman, "Pasting small votes for classification in large - databases and on-line", Machine Learning, 36(1), 85-103, 1999. +.. [B1999] L. Breiman, "Pasting small votes for classification in large + databases and on-line", Machine Learning, 36(1), 85-103, 1999. - .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), - 123-140, 1996. +.. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), + 123-140, 1996. - .. [H1998] T. Ho, "The random subspace method for constructing decision - forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, - 1998. +.. [H1998] T. Ho, "The random subspace method for constructing decision + forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, 1998. - .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", - Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. +.. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", + Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. @@ -1385,7 +1396,7 @@ and averaged. The final class label is then derived from the class label with the highest average probability. To illustrate this with a simple example, let's assume we have 3 -classifiers and a 3-class classification problems where we assign +classifiers and a 3-class classification problem where we assign equal weights to all classifiers: w1=1, w2=1, w3=1. The weighted average probabilities for a sample would then be @@ -1394,44 +1405,23 @@ calculated as follows: ================ ========== ========== ========== classifier class 1 class 2 class 3 ================ ========== ========== ========== -classifier 1 w1 * 0.2 w1 * 0.5 w1 * 0.3 -classifier 2 w2 * 0.6 w2 * 0.3 w2 * 0.1 +classifier 1 w1 * 0.2 w1 * 0.5 w1 * 0.3 +classifier 2 w2 * 0.6 w2 * 0.3 w2 * 0.1 classifier 3 w3 * 0.3 w3 * 0.4 w3 * 0.3 -weighted average 0.37 0.4 0.23 +weighted average 0.37 0.4 0.23 ================ ========== ========== ========== -Here, the predicted class label is 2, since it has the -highest average probability. - -The following example illustrates how the decision regions may change -when a soft :class:`VotingClassifier` is used based on a linear Support -Vector Machine, a Decision Tree, and a K-nearest neighbor classifier:: - - >>> from sklearn import datasets - >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn.neighbors import KNeighborsClassifier - >>> from sklearn.svm import SVC - >>> from itertools import product - >>> from sklearn.ensemble import VotingClassifier +Here, the predicted class label is 2, since it has the highest average +predicted probability. See the example on +:ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a +demonstration of how the predicted class label can be obtained from the weighted +average of predicted probabilities. - >>> # Loading some example data - >>> iris = datasets.load_iris() - >>> X = iris.data[:, [0, 2]] - >>> y = iris.target +The following figure illustrates how the decision regions may change when +a soft :class:`VotingClassifier` is trained with weights on three linear +models: - >>> # Training classifiers - >>> clf1 = DecisionTreeClassifier(max_depth=4) - >>> clf2 = KNeighborsClassifier(n_neighbors=7) - >>> clf3 = SVC(kernel='rbf', probability=True) - >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], - ... voting='soft', weights=[2, 1, 2]) - - >>> clf1 = clf1.fit(X, y) - >>> clf2 = clf2.fit(X, y) - >>> clf3 = clf3.fit(X, y) - >>> eclf = eclf.fit(X, y) - -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_002.png :target: ../auto_examples/ensemble/plot_voting_decision_regions.html :align: center :scale: 75% @@ -1455,29 +1445,25 @@ Optionally, weights can be provided for the individual classifiers:: ... voting='soft', weights=[2,5,1] ... ) -|details-start| -**Using the `VotingClassifier` with `GridSearchCV`** -|details-split| +.. dropdown:: Using the :class:`VotingClassifier` with :class:`~sklearn.model_selection.GridSearchCV` -The :class:`VotingClassifier` can also be used together with -:class:`~sklearn.model_selection.GridSearchCV` in order to tune the -hyperparameters of the individual estimators:: + The :class:`VotingClassifier` can also be used together with + :class:`~sklearn.model_selection.GridSearchCV` in order to tune the + hyperparameters of the individual estimators:: - >>> from sklearn.model_selection import GridSearchCV - >>> clf1 = LogisticRegression(random_state=1) - >>> clf2 = RandomForestClassifier(random_state=1) - >>> clf3 = GaussianNB() - >>> eclf = VotingClassifier( - ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft' - ... ) - - >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]} + >>> from sklearn.model_selection import GridSearchCV + >>> clf1 = LogisticRegression(random_state=1) + >>> clf2 = RandomForestClassifier(random_state=1) + >>> clf3 = GaussianNB() + >>> eclf = VotingClassifier( + ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], + ... voting='soft' + ... ) - >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) - >>> grid = grid.fit(iris.data, iris.target) + >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]} -|details-end| + >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) + >>> grid = grid.fit(iris.data, iris.target) .. _voting_regressor: @@ -1515,9 +1501,9 @@ The following example shows how to fit the VotingRegressor:: :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py` +* :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py` .. _stacking: @@ -1581,8 +1567,8 @@ availability, tested in the order of preference: `predict_proba`, `decision_function` and `predict`. A :class:`StackingRegressor` and :class:`StackingClassifier` can be used as -any other regressor or classifier, exposing a `predict`, `predict_proba`, and -`decision_function` methods, e.g.:: +any other regressor or classifier, exposing a `predict`, `predict_proba`, or +`decision_function` method, e.g.:: >>> y_pred = reg.predict(X_test) >>> from sklearn.metrics import r2_score @@ -1593,11 +1579,11 @@ Note that it is also possible to get the output of the stacked `estimators` using the `transform` method:: >>> reg.transform(X_test[:5]) - array([[142..., 138..., 146...], - [179..., 182..., 151...], - [139..., 132..., 158...], - [286..., 292..., 225...], - [126..., 124..., 164...]]) + array([[142, 138, 146], + [179, 182, 151], + [139, 132, 158], + [286, 292, 225], + [126, 124, 164]]) In practice, a stacking predictor predicts as good as the best predictor of the base layer and even sometimes outperforms it by combining the different @@ -1636,10 +1622,14 @@ computationally expensive. ... .format(multi_layer_regressor.score(X_test, y_test))) R2 score: 0.53 -.. topic:: References +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_ensemble_plot_stack_predictors.py` + +.. rubric:: References - .. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2 - (1992): 241-259. +.. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2 + (1992): 241-259. @@ -1692,10 +1682,10 @@ learners:: >>> from sklearn.ensemble import AdaBoostClassifier >>> X, y = load_iris(return_X_y=True) - >>> clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME",) + >>> clf = AdaBoostClassifier(n_estimators=100) >>> scores = cross_val_score(clf, X, y, cv=5) >>> scores.mean() - 0.9... + np.float64(0.95) The number of weak learners is controlled by the parameter ``n_estimators``. The ``learning_rate`` parameter controls the contribution of the weak learners in @@ -1705,27 +1695,26 @@ The main parameters to tune to obtain good results are ``n_estimators`` and the complexity of the base estimators (e.g., its depth ``max_depth`` or minimum required number of samples to consider a split ``min_samples_split``). -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance - of AdaBoost on a multi-class problem. +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance + of AdaBoost on a multi-class problem. - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary - and decision function values for a non-linearly separable two-class problem - using AdaBoost-SAMME. +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary + and decision function values for a non-linearly separable two-class problem + using AdaBoost-SAMME. - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression - with the AdaBoost.R2 algorithm. +* :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression + with the AdaBoost.R2 algorithm. -.. topic:: References +.. rubric:: References - .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of - On-Line Learning and an Application to Boosting", 1997. +.. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of + On-Line Learning and an Application to Boosting", 1997. - .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", - 2009. +.. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", 2009. - .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. +.. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. - .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of - Statistical Learning Ed. 2", Springer, 2009. +.. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of Statistical Learning + Ed. 2", Springer, 2009. diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst index 7ac538a89849b..42bcf18e1d572 100644 --- a/doc/modules/feature_extraction.rst +++ b/doc/modules/feature_extraction.rst @@ -13,9 +13,9 @@ consisting of formats such as text and image. .. note:: Feature extraction is very different from :ref:`feature_selection`: - the former consists in transforming arbitrary data, such as text or + the former consists of transforming arbitrary data, such as text or images, into numerical features usable for machine learning. The latter - is a machine learning technique applied on these features. + is a machine learning technique applied to these features. .. _dict_feature_extraction: @@ -59,7 +59,7 @@ is a traditional numerical feature:: :class:`DictVectorizer` accepts multiple string values for one feature, like, e.g., multiple categories for a movie. -Assume a database classifies each movie using some categories (not mandatories) +Assume a database classifies each movie using some categories (not mandatory) and its year of release. >>> movie_entry = [{'category': ['thriller', 'drama'], 'year': 2003}, @@ -106,8 +106,8 @@ suitable for feeding into a classifier (maybe after being piped into a >>> vec = DictVectorizer() >>> pos_vectorized = vec.fit_transform(pos_window) >>> pos_vectorized - <1x6 sparse matrix of type '<... 'numpy.float64'>' - with 6 stored elements in Compressed Sparse ... format> + >>> pos_vectorized.toarray() array([[1., 1., 1., 1., 1., 1.]]) >>> vec.get_feature_names_out() @@ -158,7 +158,7 @@ feature selectors that expect non-negative inputs. (like Python's ``dict`` and its variants in the ``collections`` module), ``(feature, value)`` pairs, or strings, depending on the constructor parameter ``input_type``. -Mapping are treated as lists of ``(feature, value)`` pairs, +Mappings are treated as lists of ``(feature, value)`` pairs, while single strings have an implicit value of 1, so ``['feat1', 'feat2', 'feat3']`` is interpreted as ``[('feat1', 1), ('feat2', 1), ('feat3', 1)]``. @@ -206,35 +206,32 @@ Note the use of a generator comprehension, which introduces laziness into the feature extraction: tokens are only processed on demand from the hasher. -|details-start| -**Implementation details** -|details-split| +.. dropdown:: Implementation details -:class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3. -As a result (and because of limitations in ``scipy.sparse``), -the maximum number of features supported is currently :math:`2^{31} - 1`. + :class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3. + As a result (and because of limitations in ``scipy.sparse``), + the maximum number of features supported is currently :math:`2^{31} - 1`. -The original formulation of the hashing trick by Weinberger et al. -used two separate hash functions :math:`h` and :math:`\xi` -to determine the column index and sign of a feature, respectively. -The present implementation works under the assumption -that the sign bit of MurmurHash3 is independent of its other bits. + The original formulation of the hashing trick by Weinberger et al. + used two separate hash functions :math:`h` and :math:`\xi` + to determine the column index and sign of a feature, respectively. + The present implementation works under the assumption + that the sign bit of MurmurHash3 is independent of its other bits. -Since a simple modulo is used to transform the hash function to a column index, -it is advisable to use a power of two as the ``n_features`` parameter; -otherwise the features will not be mapped evenly to the columns. + Since a simple modulo is used to transform the hash function to a column index, + it is advisable to use a power of two as the ``n_features`` parameter; + otherwise the features will not be mapped evenly to the columns. -.. topic:: References: + .. rubric:: References * `MurmurHash3 `_. -|details-end| -.. topic:: References: +.. rubric:: References - * Kilian Weinberger, Anirban Dasgupta, John Langford, Alex Smola and - Josh Attenberg (2009). `Feature hashing for large scale multitask learning - `_. Proc. ICML. +* Kilian Weinberger, Anirban Dasgupta, John Langford, Alex Smola and + Josh Attenberg (2009). `Feature hashing for large scale multitask learning + `_. Proc. ICML. .. _text_feature_extraction: @@ -248,7 +245,7 @@ The Bag of Words representation ------------------------------- Text Analysis is a major application field for machine learning -algorithms. However the raw data, a sequence of symbols cannot be fed +algorithms. However the raw data, a sequence of symbols, cannot be fed directly to the algorithms themselves as most of them expect numerical feature vectors with a fixed size rather than the raw text documents with variable length. @@ -310,7 +307,7 @@ counting in a single class:: This model has many parameters, however the default values are quite reasonable (please see the :ref:`reference documentation -` for the details):: +` for the details):: >>> vectorizer = CountVectorizer() >>> vectorizer @@ -327,8 +324,8 @@ corpus of text documents:: ... ] >>> X = vectorizer.fit_transform(corpus) >>> X - <4x9 sparse matrix of type '<... 'numpy.int64'>' - with 19 stored elements in Compressed Sparse ... format> + The default configuration tokenizes the string by extracting words of at least 2 letters. The specific function that does this step can be @@ -403,7 +400,7 @@ Using stop words Stop words are words like "and", "the", "him", which are presumed to be uninformative in representing the content of a text, and which may be -removed to avoid them being construed as signal for prediction. Sometimes, +removed to avoid them being construed as informative for prediction. Sometimes, however, similar words are useful for prediction, such as in classifying writing style or personality. @@ -422,12 +419,12 @@ tokenizer, so if *we've* is in ``stop_words``, but *ve* is not, *ve* will be retained from *we've* in transformed text. Our vectorizers will try to identify and warn about some kinds of inconsistencies. -.. topic:: References +.. rubric:: References - .. [NQY18] J. Nothman, H. Qin and R. Yurchak (2018). - `"Stop Word Lists in Free Open-source Software Packages" - `__. - In *Proc. Workshop for NLP Open Source Software*. +.. [NQY18] J. Nothman, H. Qin and R. Yurchak (2018). + `"Stop Word Lists in Free Open-source Software Packages" + `__. + In *Proc. Workshop for NLP Open Source Software*. .. _tfidf: @@ -492,132 +489,126 @@ class:: TfidfTransformer(smooth_idf=False) Again please see the :ref:`reference documentation -` for the details on all the parameters. - -|details-start| -**Numeric example of a tf-idf matrix** -|details-split| - -Let's take an example with the following counts. The first term is present -100% of the time hence not very interesting. The two other features only -in less than 50% of the time hence probably more representative of the -content of the documents:: - - >>> counts = [[3, 0, 1], - ... [2, 0, 0], - ... [3, 0, 0], - ... [4, 0, 0], - ... [3, 2, 0], - ... [3, 0, 2]] - ... - >>> tfidf = transformer.fit_transform(counts) - >>> tfidf - <6x3 sparse matrix of type '<... 'numpy.float64'>' - with 9 stored elements in Compressed Sparse ... format> +` for the details on all the parameters. - >>> tfidf.toarray() - array([[0.81940995, 0. , 0.57320793], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [0.47330339, 0.88089948, 0. ], - [0.58149261, 0. , 0.81355169]]) +.. dropdown:: Numeric example of a tf-idf matrix -Each row is normalized to have unit Euclidean norm: + Let's take an example with the following counts. The first term is present + 100% of the time hence not very interesting. The two other features only + in less than 50% of the time hence probably more representative of the + content of the documents:: -:math:`v_{norm} = \frac{v}{||v||_2} = \frac{v}{\sqrt{v{_1}^2 + -v{_2}^2 + \dots + v{_n}^2}}` + >>> counts = [[3, 0, 1], + ... [2, 0, 0], + ... [3, 0, 0], + ... [4, 0, 0], + ... [3, 2, 0], + ... [3, 0, 2]] + ... + >>> tfidf = transformer.fit_transform(counts) + >>> tfidf + -For example, we can compute the tf-idf of the first term in the first -document in the `counts` array as follows: + >>> tfidf.toarray() + array([[0.81940995, 0. , 0.57320793], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [0.47330339, 0.88089948, 0. ], + [0.58149261, 0. , 0.81355169]]) -:math:`n = 6` + Each row is normalized to have unit Euclidean norm: -:math:`\text{df}(t)_{\text{term1}} = 6` + :math:`v_{norm} = \frac{v}{||v||_2} = \frac{v}{\sqrt{v{_1}^2 + + v{_2}^2 + \dots + v{_n}^2}}` -:math:`\text{idf}(t)_{\text{term1}} = -\log \frac{n}{\text{df}(t)} + 1 = \log(1)+1 = 1` + For example, we can compute the tf-idf of the first term in the first + document in the `counts` array as follows: -:math:`\text{tf-idf}_{\text{term1}} = \text{tf} \times \text{idf} = 3 \times 1 = 3` + :math:`n = 6` -Now, if we repeat this computation for the remaining 2 terms in the document, -we get + :math:`\text{df}(t)_{\text{term1}} = 6` -:math:`\text{tf-idf}_{\text{term2}} = 0 \times (\log(6/1)+1) = 0` + :math:`\text{idf}(t)_{\text{term1}} = + \log \frac{n}{\text{df}(t)} + 1 = \log(1)+1 = 1` -:math:`\text{tf-idf}_{\text{term3}} = 1 \times (\log(6/2)+1) \approx 2.0986` + :math:`\text{tf-idf}_{\text{term1}} = \text{tf} \times \text{idf} = 3 \times 1 = 3` -and the vector of raw tf-idfs: + Now, if we repeat this computation for the remaining 2 terms in the document, + we get -:math:`\text{tf-idf}_{\text{raw}} = [3, 0, 2.0986].` + :math:`\text{tf-idf}_{\text{term2}} = 0 \times (\log(6/1)+1) = 0` + :math:`\text{tf-idf}_{\text{term3}} = 1 \times (\log(6/2)+1) \approx 2.0986` -Then, applying the Euclidean (L2) norm, we obtain the following tf-idfs -for document 1: + and the vector of raw tf-idfs: -:math:`\frac{[3, 0, 2.0986]}{\sqrt{\big(3^2 + 0^2 + 2.0986^2\big)}} -= [ 0.819, 0, 0.573].` + :math:`\text{tf-idf}_{\text{raw}} = [3, 0, 2.0986].` -Furthermore, the default parameter ``smooth_idf=True`` adds "1" to the numerator -and denominator as if an extra document was seen containing every term in the -collection exactly once, which prevents zero divisions: -:math:`\text{idf}(t) = \log{\frac{1 + n}{1+\text{df}(t)}} + 1` + Then, applying the Euclidean (L2) norm, we obtain the following tf-idfs + for document 1: -Using this modification, the tf-idf of the third term in document 1 changes to -1.8473: + :math:`\frac{[3, 0, 2.0986]}{\sqrt{\big(3^2 + 0^2 + 2.0986^2\big)}} + = [ 0.819, 0, 0.573].` -:math:`\text{tf-idf}_{\text{term3}} = 1 \times \log(7/3)+1 \approx 1.8473` + Furthermore, the default parameter ``smooth_idf=True`` adds "1" to the numerator + and denominator as if an extra document was seen containing every term in the + collection exactly once, which prevents zero divisions: -And the L2-normalized tf-idf changes to + :math:`\text{idf}(t) = \log{\frac{1 + n}{1+\text{df}(t)}} + 1` -:math:`\frac{[3, 0, 1.8473]}{\sqrt{\big(3^2 + 0^2 + 1.8473^2\big)}} -= [0.8515, 0, 0.5243]`:: + Using this modification, the tf-idf of the third term in document 1 changes to + 1.8473: - >>> transformer = TfidfTransformer() - >>> transformer.fit_transform(counts).toarray() - array([[0.85151335, 0. , 0.52433293], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [1. , 0. , 0. ], - [0.55422893, 0.83236428, 0. ], - [0.63035731, 0. , 0.77630514]]) + :math:`\text{tf-idf}_{\text{term3}} = 1 \times \log(7/3)+1 \approx 1.8473` -The weights of each -feature computed by the ``fit`` method call are stored in a model -attribute:: + And the L2-normalized tf-idf changes to - >>> transformer.idf_ - array([1. ..., 2.25..., 1.84...]) + :math:`\frac{[3, 0, 1.8473]}{\sqrt{\big(3^2 + 0^2 + 1.8473^2\big)}} + = [0.8515, 0, 0.5243]`:: + >>> transformer = TfidfTransformer() + >>> transformer.fit_transform(counts).toarray() + array([[0.85151335, 0. , 0.52433293], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [1. , 0. , 0. ], + [0.55422893, 0.83236428, 0. ], + [0.63035731, 0. , 0.77630514]]) + The weights of each + feature computed by the ``fit`` method call are stored in a model + attribute:: + >>> transformer.idf_ + array([1., 2.25, 1.84]) -As tf–idf is very often used for text features, there is also another -class called :class:`TfidfVectorizer` that combines all the options of -:class:`CountVectorizer` and :class:`TfidfTransformer` in a single model:: + As tf-idf is very often used for text features, there is also another + class called :class:`TfidfVectorizer` that combines all the options of + :class:`CountVectorizer` and :class:`TfidfTransformer` in a single model:: - >>> from sklearn.feature_extraction.text import TfidfVectorizer - >>> vectorizer = TfidfVectorizer() - >>> vectorizer.fit_transform(corpus) - <4x9 sparse matrix of type '<... 'numpy.float64'>' - with 19 stored elements in Compressed Sparse ... format> + >>> from sklearn.feature_extraction.text import TfidfVectorizer + >>> vectorizer = TfidfVectorizer() + >>> vectorizer.fit_transform(corpus) + -While the tf–idf normalization is often very useful, there might -be cases where the binary occurrence markers might offer better -features. This can be achieved by using the ``binary`` parameter -of :class:`CountVectorizer`. In particular, some estimators such as -:ref:`bernoulli_naive_bayes` explicitly model discrete boolean random -variables. Also, very short texts are likely to have noisy tf–idf values -while the binary occurrence info is more stable. + While the tf-idf normalization is often very useful, there might + be cases where the binary occurrence markers might offer better + features. This can be achieved by using the ``binary`` parameter + of :class:`CountVectorizer`. In particular, some estimators such as + :ref:`bernoulli_naive_bayes` explicitly model discrete boolean random + variables. Also, very short texts are likely to have noisy tf-idf values + while the binary occurrence info is more stable. -As usual the best way to adjust the feature extraction parameters -is to use a cross-validated grid search, for instance by pipelining the -feature extractor with a classifier: + As usual the best way to adjust the feature extraction parameters + is to use a cross-validated grid search, for instance by pipelining the + feature extractor with a classifier: -* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` -|details-end| Decoding text files ------------------- @@ -646,64 +637,60 @@ or ``"replace"``. See the documentation for the Python function ``bytes.decode`` for more details (type ``help(bytes.decode)`` at the Python prompt). -|details-start| -**Troubleshooting decoding text** -|details-split| - -If you are having trouble decoding text, here are some things to try: - -- Find out what the actual encoding of the text is. The file might come - with a header or README that tells you the encoding, or there might be some - standard encoding you can assume based on where the text comes from. - -- You may be able to find out what kind of encoding it is in general - using the UNIX command ``file``. The Python ``chardet`` module comes with - a script called ``chardetect.py`` that will guess the specific encoding, - though you cannot rely on its guess being correct. - -- You could try UTF-8 and disregard the errors. You can decode byte - strings with ``bytes.decode(errors='replace')`` to replace all - decoding errors with a meaningless character, or set - ``decode_error='replace'`` in the vectorizer. This may damage the - usefulness of your features. - -- Real text may come from a variety of sources that may have used different - encodings, or even be sloppily decoded in a different encoding than the - one it was encoded with. This is common in text retrieved from the Web. - The Python package `ftfy`_ can automatically sort out some classes of - decoding errors, so you could try decoding the unknown text as ``latin-1`` - and then using ``ftfy`` to fix errors. - -- If the text is in a mish-mash of encodings that is simply too hard to sort - out (which is the case for the 20 Newsgroups dataset), you can fall back on - a simple single-byte encoding such as ``latin-1``. Some text may display - incorrectly, but at least the same sequence of bytes will always represent - the same feature. - -For example, the following snippet uses ``chardet`` -(not shipped with scikit-learn, must be installed separately) -to figure out the encoding of three texts. -It then vectorizes the texts and prints the learned vocabulary. -The output is not shown here. - - >>> import chardet # doctest: +SKIP - >>> text1 = b"Sei mir gegr\xc3\xbc\xc3\x9ft mein Sauerkraut" - >>> text2 = b"holdselig sind deine Ger\xfcche" - >>> text3 = b"\xff\xfeA\x00u\x00f\x00 \x00F\x00l\x00\xfc\x00g\x00e\x00l\x00n\x00 \x00d\x00e\x00s\x00 \x00G\x00e\x00s\x00a\x00n\x00g\x00e\x00s\x00,\x00 \x00H\x00e\x00r\x00z\x00l\x00i\x00e\x00b\x00c\x00h\x00e\x00n\x00,\x00 \x00t\x00r\x00a\x00g\x00 \x00i\x00c\x00h\x00 \x00d\x00i\x00c\x00h\x00 \x00f\x00o\x00r\x00t\x00" - >>> decoded = [x.decode(chardet.detect(x)['encoding']) - ... for x in (text1, text2, text3)] # doctest: +SKIP - >>> v = CountVectorizer().fit(decoded).vocabulary_ # doctest: +SKIP - >>> for term in v: print(v) # doctest: +SKIP - -(Depending on the version of ``chardet``, it might get the first one wrong.) - -For an introduction to Unicode and character encodings in general, -see Joel Spolsky's `Absolute Minimum Every Software Developer Must Know -About Unicode `_. - -.. _`ftfy`: https://github.com/LuminosoInsight/python-ftfy - -|details-end| +.. dropdown:: Troubleshooting decoding text + + If you are having trouble decoding text, here are some things to try: + + - Find out what the actual encoding of the text is. The file might come + with a header or README that tells you the encoding, or there might be some + standard encoding you can assume based on where the text comes from. + + - You may be able to find out what kind of encoding it is in general + using the UNIX command ``file``. The Python ``chardet`` module comes with + a script called ``chardetect.py`` that will guess the specific encoding, + though you cannot rely on its guess being correct. + + - You could try UTF-8 and disregard the errors. You can decode byte + strings with ``bytes.decode(errors='replace')`` to replace all + decoding errors with a meaningless character, or set + ``decode_error='replace'`` in the vectorizer. This may damage the + usefulness of your features. + + - Real text may come from a variety of sources that may have used different + encodings, or even be sloppily decoded in a different encoding than the + one it was encoded with. This is common in text retrieved from the Web. + The Python package `ftfy `__ + can automatically sort out some classes of + decoding errors, so you could try decoding the unknown text as ``latin-1`` + and then using ``ftfy`` to fix errors. + + - If the text is in a mish-mash of encodings that is simply too hard to sort + out (which is the case for the 20 Newsgroups dataset), you can fall back on + a simple single-byte encoding such as ``latin-1``. Some text may display + incorrectly, but at least the same sequence of bytes will always represent + the same feature. + + For example, the following snippet uses ``chardet`` + (not shipped with scikit-learn, must be installed separately) + to figure out the encoding of three texts. + It then vectorizes the texts and prints the learned vocabulary. + The output is not shown here. + + >>> import chardet # doctest: +SKIP + >>> text1 = b"Sei mir gegr\xc3\xbc\xc3\x9ft mein Sauerkraut" + >>> text2 = b"holdselig sind deine Ger\xfcche" + >>> text3 = b"\xff\xfeA\x00u\x00f\x00 \x00F\x00l\x00\xfc\x00g\x00e\x00l\x00n\x00 \x00d\x00e\x00s\x00 \x00G\x00e\x00s\x00a\x00n\x00g\x00e\x00s\x00,\x00 \x00H\x00e\x00r\x00z\x00l\x00i\x00e\x00b\x00c\x00h\x00e\x00n\x00,\x00 \x00t\x00r\x00a\x00g\x00 \x00i\x00c\x00h\x00 \x00d\x00i\x00c\x00h\x00 \x00f\x00o\x00r\x00t\x00" + >>> decoded = [x.decode(chardet.detect(x)['encoding']) + ... for x in (text1, text2, text3)] # doctest: +SKIP + >>> v = CountVectorizer().fit(decoded).vocabulary_ # doctest: +SKIP + >>> for term in v: print(v) # doctest: +SKIP + + (Depending on the version of ``chardet``, it might get the first one wrong.) + + For an introduction to Unicode and character encodings in general, + see Joel Spolsky's `Absolute Minimum Every Software Developer Must Know + About Unicode `_. + Applications and examples ------------------------- @@ -768,15 +755,16 @@ span across words:: >>> ngram_vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(5, 5)) >>> ngram_vectorizer.fit_transform(['jumpy fox']) - <1x4 sparse matrix of type '<... 'numpy.int64'>' - with 4 stored elements in Compressed Sparse ... format> + + >>> ngram_vectorizer.get_feature_names_out() array([' fox ', ' jump', 'jumpy', 'umpy '], ...) >>> ngram_vectorizer = CountVectorizer(analyzer='char', ngram_range=(5, 5)) >>> ngram_vectorizer.fit_transform(['jumpy fox']) - <1x5 sparse matrix of type '<... 'numpy.int64'>' - with 5 stored elements in Compressed Sparse ... format> + >>> ngram_vectorizer.get_feature_names_out() array(['jumpy', 'mpy f', 'py fo', 'umpy ', 'y fox'], ...) @@ -804,9 +792,9 @@ problems which are currently outside of the scope of scikit-learn. Vectorizing a large text corpus with the hashing trick ------------------------------------------------------ -The above vectorization scheme is simple but the fact that it holds an **in- -memory mapping from the string tokens to the integer feature indices** (the -``vocabulary_`` attribute) causes several **problems when dealing with large +The above vectorization scheme is simple but the fact that it holds an +**in-memory mapping from the string tokens to the integer feature indices** +(the ``vocabulary_`` attribute) causes several **problems when dealing with large datasets**: - the larger the corpus, the larger the vocabulary will grow and hence the @@ -825,7 +813,7 @@ datasets**: - it is not easily possible to split the vectorization work into concurrent sub tasks as the ``vocabulary_`` attribute would have to be a shared state with a fine grained synchronization barrier: the mapping from token string to - feature index is dependent on ordering of the first occurrence of each token + feature index is dependent on the ordering of the first occurrence of each token hence would have to be shared, potentially harming the concurrent workers' performance to the point of making them slower than the sequential variant. @@ -834,7 +822,7 @@ It is possible to overcome those limitations by combining the "hashing trick" :class:`~sklearn.feature_extraction.FeatureHasher` class and the text preprocessing and tokenization features of the :class:`CountVectorizer`. -This combination is implementing in :class:`HashingVectorizer`, +This combination is implemented in :class:`HashingVectorizer`, a transformer class that is mostly API compatible with :class:`CountVectorizer`. :class:`HashingVectorizer` is stateless, meaning that you don't have to call ``fit`` on it:: @@ -842,8 +830,8 @@ meaning that you don't have to call ``fit`` on it:: >>> from sklearn.feature_extraction.text import HashingVectorizer >>> hv = HashingVectorizer(n_features=10) >>> hv.transform(corpus) - <4x10 sparse matrix of type '<... 'numpy.float64'>' - with 16 stored elements in Compressed Sparse ... format> + You can see that 16 non-zero feature tokens were extracted in the vector output: this is less than the 19 non-zeros extracted previously by the @@ -866,8 +854,8 @@ Let's try again with the default setting:: >>> hv = HashingVectorizer() >>> hv.transform(corpus) - <4x1048576 sparse matrix of type '<... 'numpy.float64'>' - with 19 stored elements in Compressed Sparse ... format> + We no longer get the collisions, but this comes at the expense of a much larger dimensionality of the output space. @@ -884,28 +872,25 @@ The :class:`HashingVectorizer` also comes with the following limitations: model. A :class:`TfidfTransformer` can be appended to it in a pipeline if required. -|details-start| -**Performing out-of-core scaling with HashingVectorizer** -|details-split| +.. dropdown:: Performing out-of-core scaling with HashingVectorizer -An interesting development of using a :class:`HashingVectorizer` is the ability -to perform `out-of-core`_ scaling. This means that we can learn from data that -does not fit into the computer's main memory. + An interesting development of using a :class:`HashingVectorizer` is the ability + to perform `out-of-core`_ scaling. This means that we can learn from data that + does not fit into the computer's main memory. -.. _out-of-core: https://en.wikipedia.org/wiki/Out-of-core_algorithm + .. _out-of-core: https://en.wikipedia.org/wiki/Out-of-core_algorithm -A strategy to implement out-of-core scaling is to stream data to the estimator -in mini-batches. Each mini-batch is vectorized using :class:`HashingVectorizer` -so as to guarantee that the input space of the estimator has always the same -dimensionality. The amount of memory used at any time is thus bounded by the -size of a mini-batch. Although there is no limit to the amount of data that can -be ingested using such an approach, from a practical point of view the learning -time is often limited by the CPU time one wants to spend on the task. + A strategy to implement out-of-core scaling is to stream data to the estimator + in mini-batches. Each mini-batch is vectorized using :class:`HashingVectorizer` + so as to guarantee that the input space of the estimator has always the same + dimensionality. The amount of memory used at any time is thus bounded by the + size of a mini-batch. Although there is no limit to the amount of data that can + be ingested using such an approach, from a practical point of view the learning + time is often limited by the CPU time one wants to spend on the task. -For a full-fledged example of out-of-core scaling in a text classification -task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. + For a full-fledged example of out-of-core scaling in a text classification + task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. -|details-end| Customizing the vectorizer classes ---------------------------------- @@ -945,65 +930,58 @@ parameters it is possible to derive from the class and override the ``build_preprocessor``, ``build_tokenizer`` and ``build_analyzer`` factory methods instead of passing custom functions. -|details-start| -**Tips and tricks** -|details-split| - -Some tips and tricks: - -* If documents are pre-tokenized by an external package, then store them in - files (or strings) with the tokens separated by whitespace and pass - ``analyzer=str.split`` -* Fancy token-level analysis such as stemming, lemmatizing, compound - splitting, filtering based on part-of-speech, etc. are not included in the - scikit-learn codebase, but can be added by customizing either the - tokenizer or the analyzer. - Here's a ``CountVectorizer`` with a tokenizer and lemmatizer using - `NLTK `_:: - - >>> from nltk import word_tokenize # doctest: +SKIP - >>> from nltk.stem import WordNetLemmatizer # doctest: +SKIP - >>> class LemmaTokenizer: - ... def __init__(self): - ... self.wnl = WordNetLemmatizer() - ... def __call__(self, doc): - ... return [self.wnl.lemmatize(t) for t in word_tokenize(doc)] - ... - >>> vect = CountVectorizer(tokenizer=LemmaTokenizer()) # doctest: +SKIP - - (Note that this will not filter out punctuation.) - - - The following example will, for instance, transform some British spelling - to American spelling:: - - >>> import re - >>> def to_british(tokens): - ... for t in tokens: - ... t = re.sub(r"(...)our$", r"\1or", t) - ... t = re.sub(r"([bt])re$", r"\1er", t) - ... t = re.sub(r"([iy])s(e$|ing|ation)", r"\1z\2", t) - ... t = re.sub(r"ogue$", "og", t) - ... yield t - ... - >>> class CustomVectorizer(CountVectorizer): - ... def build_tokenizer(self): - ... tokenize = super().build_tokenizer() - ... return lambda doc: list(to_british(tokenize(doc))) - ... - >>> print(CustomVectorizer().build_analyzer()(u"color colour")) - [...'color', ...'color'] - - for other styles of preprocessing; examples include stemming, lemmatization, - or normalizing numerical tokens, with the latter illustrated in: - - * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` - - -Customizing the vectorizer can also be useful when handling Asian languages -that do not use an explicit word separator such as whitespace. - -|details-end| +.. dropdown:: Tips and tricks + :color: success + + * If documents are pre-tokenized by an external package, then store them in + files (or strings) with the tokens separated by whitespace and pass + ``analyzer=str.split`` + * Fancy token-level analysis such as stemming, lemmatizing, compound + splitting, filtering based on part-of-speech, etc. are not included in the + scikit-learn codebase, but can be added by customizing either the + tokenizer or the analyzer. + Here's a ``CountVectorizer`` with a tokenizer and lemmatizer using + `NLTK `_:: + + >>> from nltk import word_tokenize # doctest: +SKIP + >>> from nltk.stem import WordNetLemmatizer # doctest: +SKIP + >>> class LemmaTokenizer: + ... def __init__(self): + ... self.wnl = WordNetLemmatizer() + ... def __call__(self, doc): + ... return [self.wnl.lemmatize(t) for t in word_tokenize(doc)] + ... + >>> vect = CountVectorizer(tokenizer=LemmaTokenizer()) # doctest: +SKIP + + (Note that this will not filter out punctuation.) + + The following example will, for instance, transform some British spelling + to American spelling:: + + >>> import re + >>> def to_british(tokens): + ... for t in tokens: + ... t = re.sub(r"(...)our$", r"\1or", t) + ... t = re.sub(r"([bt])re$", r"\1er", t) + ... t = re.sub(r"([iy])s(e$|ing|ation)", r"\1z\2", t) + ... t = re.sub(r"ogue$", "og", t) + ... yield t + ... + >>> class CustomVectorizer(CountVectorizer): + ... def build_tokenizer(self): + ... tokenize = super().build_tokenizer() + ... return lambda doc: list(to_british(tokenize(doc))) + ... + >>> print(CustomVectorizer().build_analyzer()(u"color colour")) + [...'color', ...'color'] + + for other styles of preprocessing; examples include stemming, lemmatization, + or normalizing numerical tokens, with the latter illustrated in: + + * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` + + Customizing the vectorizer can also be useful when handling Asian languages + that do not use an explicit word separator such as whitespace. .. _image_feature_extraction: @@ -1063,10 +1041,12 @@ implemented as a scikit-learn transformer, so it can be used in pipelines. See:: >>> patches.shape (45, 2, 2, 3) +.. _connectivity_graph_image: + Connectivity graph of an image ------------------------------- -Several estimators in the scikit-learn can use connectivity information between +Several estimators in scikit-learn can use connectivity information between features or samples. For instance Ward clustering (:ref:`hierarchical_clustering`) can cluster together only neighboring pixels of an image, thus forming contiguous patches: @@ -1080,8 +1060,8 @@ For this purpose, the estimators use a 'connectivity' matrix, giving which samples are connected. The function :func:`img_to_graph` returns such a matrix from a 2D or 3D -image. Similarly, :func:`grid_to_graph` build a connectivity matrix for -images given the shape of these image. +image. Similarly, :func:`grid_to_graph` builds a connectivity matrix for +images given the shape of these images. These matrices can be used to impose connectivity in estimators that use connectivity information, such as Ward clustering diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index 1b5ce57b0074f..ffee801f34ccc 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -114,11 +114,11 @@ applied to non-negative features, such as frequencies. feature selection as well. One needs to provide a `score_func` where `y=None`. The `score_func` should use internally `X` to compute the scores. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection.py` - * :ref:`sphx_glr_auto_examples_feature_selection_plot_f_test_vs_mi.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_f_test_vs_mi.py` .. _rfe: @@ -131,7 +131,7 @@ is to select features by recursively considering smaller and smaller sets of features. First, the estimator is trained on the initial set of features and the importance of each feature is obtained either through any specific attribute (such as ``coef_``, ``feature_importances_``) or callable. Then, the least important -features are pruned from current set of features. That procedure is recursively +features are pruned from the current set of features. That procedure is recursively repeated on the pruned set until the desired number of features to select is eventually reached. @@ -139,19 +139,19 @@ eventually reached. number of features. In more details, the number of features selected is tuned automatically by fitting an :class:`RFE` selector on the different cross-validation splits (provided by the `cv` parameter). The performance -of the :class:`RFE` selector are evaluated using `scorer` for different number +of the :class:`RFE` selector is evaluated using `scorer` for different numbers of selected features and aggregated together. Finally, the scores are averaged across folds and the number of features selected is set to the number of features that maximize the cross-validation score. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_digits.py`: A recursive feature elimination example - showing the relevance of pixels in a digit classification task. +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_digits.py`: A recursive feature elimination example + showing the relevance of pixels in a digit classification task. - * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`: A recursive feature - elimination example with automatic tuning of the number of features - selected with cross-validation. +* :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`: A recursive feature + elimination example with automatic tuning of the number of features + selected with cross-validation. .. _select_from_model: @@ -162,7 +162,7 @@ Feature selection using SelectFromModel estimator that assigns importance to each feature through a specific attribute (such as ``coef_``, ``feature_importances_``) or via an `importance_getter` callable after fitting. The features are considered unimportant and removed if the corresponding -importance of the feature values are below the provided +importance of the feature values is below the provided ``threshold`` parameter. Apart from specifying the threshold numerically, there are built-in heuristics for finding a threshold using a string argument. Available heuristics are "mean", "median" and float multiples of these like @@ -171,9 +171,9 @@ Available heuristics are "mean", "median" and float multiples of these like For examples on how it is to be used refer to the sections below. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` .. _l1_feature_selection: @@ -203,46 +203,46 @@ for classification:: >>> X_new.shape (150, 3) -With SVMs and logistic-regression, the parameter C controls the sparsity: +With SVMs and logistic regression, the parameter C controls the sparsity: the smaller C the fewer features selected. With Lasso, the higher the alpha parameter, the fewer features selected. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_dense_vs_sparse_data.py`. +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_dense_vs_sparse_data.py`. .. _compressive_sensing: -|details-start| -**L1-recovery and compressive sensing** -|details-split| - -For a good choice of alpha, the :ref:`lasso` can fully recover the -exact set of non-zero variables using only few observations, provided -certain specific conditions are met. In particular, the number of -samples should be "sufficiently large", or L1 models will perform at -random, where "sufficiently large" depends on the number of non-zero -coefficients, the logarithm of the number of features, the amount of -noise, the smallest absolute value of non-zero coefficients, and the -structure of the design matrix X. In addition, the design matrix must -display certain specific properties, such as not being too correlated. - -There is no general rule to select an alpha parameter for recovery of -non-zero coefficients. It can by set by cross-validation -(:class:`~sklearn.linear_model.LassoCV` or -:class:`~sklearn.linear_model.LassoLarsCV`), though this may lead to -under-penalized models: including a small number of non-relevant variables -is not detrimental to prediction score. BIC -(:class:`~sklearn.linear_model.LassoLarsIC`) tends, on the opposite, to set -high values of alpha. - -.. topic:: Reference - - Richard G. Baraniuk "Compressive Sensing", IEEE Signal - Processing Magazine [120] July 2007 - http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf - -|details-end| +.. dropdown:: L1-recovery and compressive sensing + + For a good choice of alpha, the :ref:`lasso` can fully recover the + exact set of non-zero variables using only few observations, provided + certain specific conditions are met. In particular, the number of + samples should be "sufficiently large", or L1 models will perform at + random, where "sufficiently large" depends on the number of non-zero + coefficients, the logarithm of the number of features, the amount of + noise, the smallest absolute value of non-zero coefficients, and the + structure of the design matrix X. In addition, the design matrix must + display certain specific properties, such as not being too correlated. + On the use of Lasso for sparse signal recovery, see this example on + compressive sensing: + :ref:`sphx_glr_auto_examples_applications_plot_tomography_l1_reconstruction.py`. + + There is no general rule to select an alpha parameter for recovery of + non-zero coefficients. It can be set by cross-validation + (:class:`~sklearn.linear_model.LassoCV` or + :class:`~sklearn.linear_model.LassoLarsCV`), though this may lead to + under-penalized models: including a small number of non-relevant variables + is not detrimental to prediction score. BIC + (:class:`~sklearn.linear_model.LassoLarsIC`) tends, on the opposite, to set + high values of alpha. + + .. rubric:: References + + Richard G. Baraniuk "Compressive Sensing", IEEE Signal + Processing Magazine [120] July 2007 + http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf + Tree-based feature selection ---------------------------- @@ -262,20 +262,20 @@ meta-transformer):: >>> clf = ExtraTreesClassifier(n_estimators=50) >>> clf = clf.fit(X, y) >>> clf.feature_importances_ # doctest: +SKIP - array([ 0.04..., 0.05..., 0.4..., 0.4...]) + array([ 0.04, 0.05, 0.4, 0.4]) >>> model = SelectFromModel(clf, prefit=True) >>> X_new = model.transform(X) >>> X_new.shape # doctest: +SKIP (150, 2) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`: example on - synthetic data showing the recovery of the actually meaningful - features. +* :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`: example on + synthetic data showing the recovery of the actually meaningful features. - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`: example - on face recognition data. +* :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`: example + discussing the caveats of using impurity-based feature importances as a proxy for + feature relevance. .. _sequential_feature_selection: @@ -299,38 +299,35 @@ instead of starting with no features and greedily adding features, we start with *all* the features and greedily *remove* features from the set. The `direction` parameter controls whether forward or backward SFS is used. -|details-start| -**Detail on Sequential Feature Selection** -|details-split| - -In general, forward and backward selection do not yield equivalent results. -Also, one may be much faster than the other depending on the requested number -of selected features: if we have 10 features and ask for 7 selected features, -forward selection would need to perform 7 iterations while backward selection -would only need to perform 3. - -SFS differs from :class:`~sklearn.feature_selection.RFE` and -:class:`~sklearn.feature_selection.SelectFromModel` in that it does not -require the underlying model to expose a `coef_` or `feature_importances_` -attribute. It may however be slower considering that more models need to be -evaluated, compared to the other approaches. For example in backward -selection, the iteration going from `m` features to `m - 1` features using k-fold -cross-validation requires fitting `m * k` models, while -:class:`~sklearn.feature_selection.RFE` would require only a single fit, and -:class:`~sklearn.feature_selection.SelectFromModel` always just does a single -fit and requires no iterations. - -.. topic:: Reference - - .. [sfs] Ferri et al, `Comparative study of techniques for +.. dropdown:: Details on Sequential Feature Selection + + In general, forward and backward selection do not yield equivalent results. + Also, one may be much faster than the other depending on the requested number + of selected features: if we have 10 features and ask for 7 selected features, + forward selection would need to perform 7 iterations while backward selection + would only need to perform 3. + + SFS differs from :class:`~sklearn.feature_selection.RFE` and + :class:`~sklearn.feature_selection.SelectFromModel` in that it does not + require the underlying model to expose a `coef_` or `feature_importances_` + attribute. It may however be slower considering that more models need to be + evaluated, compared to the other approaches. For example in backward + selection, the iteration going from `m` features to `m - 1` features using k-fold + cross-validation requires fitting `m * k` models, while + :class:`~sklearn.feature_selection.RFE` would require only a single fit, and + :class:`~sklearn.feature_selection.SelectFromModel` always just does a single + fit and requires no iterations. + + .. rubric:: References + + .. [sfs] Ferri et al, `Comparative study of techniques for large-scale feature selection `_. -|details-end| -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` +* :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` Feature selection as part of a pipeline ======================================= diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 58e56a557ed73..46d04ac35d832 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -88,12 +88,12 @@ the API of standard scikit-learn estimators, :class:`GaussianProcessRegressor`: externally for other ways of selecting hyperparameters, e.g., via Markov chain Monte Carlo. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy_targets.py` - * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy.py` - * :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py` - * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_co2.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy_targets.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py` +* :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_co2.py` .. _gpc: @@ -106,11 +106,11 @@ The :class:`GaussianProcessClassifier` implements Gaussian processes (GP) for classification purposes, more specifically for probabilistic classification, where test predictions take the form of class probabilities. GaussianProcessClassifier places a GP prior on a latent function :math:`f`, -which is then squashed through a link function to obtain the probabilistic +which is then squashed through a link function :math:`\pi` to obtain the probabilistic classification. The latent function :math:`f` is a so-called nuisance function, whose values are not observed and are not relevant by themselves. Its purpose is to allow a convenient formulation of the model, and :math:`f` -is removed (integrated out) during prediction. GaussianProcessClassifier +is removed (integrated out) during prediction. :class:`GaussianProcessClassifier` implements the logistic link function, for which the integral cannot be computed analytically but is easily approximated in the binary case. @@ -134,6 +134,11 @@ that have been chosen randomly from the range of allowed values. If the initial hyperparameters should be kept fixed, `None` can be passed as optimizer. +In some scenarios, information about the latent function :math:`f` is desired +(i.e. the mean :math:`\bar{f_*}` and the variance :math:`\text{Var}[f_*]` described +in Eqs. (3.21) and (3.24) of [RW2006]_). The :class:`GaussianProcessClassifier` +provides access to these quantities via the `latent_mean_and_variance` method. + :class:`GaussianProcessClassifier` supports multi-class classification by performing either one-versus-rest or one-versus-one based training and prediction. In one-versus-rest, one binary Gaussian process classifier is @@ -169,7 +174,7 @@ While the hyperparameters chosen by optimizing LML have a considerably larger LML, they perform slightly worse according to the log-loss on test data. The figure shows that this is because they exhibit a steep change of the class probabilities at the class boundaries (which is good) but have predicted -probabilities close to 0.5 far away from the class boundaries (which is bad) +probabilities close to 0.5 far away from the class boundaries (which is bad). This undesirable effect is caused by the Laplace approximation used internally by GPC. @@ -209,7 +214,7 @@ Gaussian process classification (GPC) on iris dataset ----------------------------------------------------- This example illustrates the predicted probability of GPC for an isotropic -and anisotropic RBF kernel on a two-dimensional version for the iris-dataset. +and anisotropic RBF kernel on a two-dimensional version for the iris dataset. This illustrates the applicability of GPC to non-binary classification. The anisotropic RBF kernel obtains slightly higher log-marginal-likelihood by assigning different length-scales to the two feature dimensions. @@ -236,96 +241,93 @@ translations in the input space, while non-stationary kernels depend also on the specific values of the datapoints. Stationary kernels can further be subdivided into isotropic and anisotropic kernels, where isotropic kernels are also invariant to rotations in the input space. For more details, we refer to -Chapter 4 of [RW2006]_. For guidance on how to best combine different kernels, -we refer to [Duv2014]_. - -|details-start| -**Gaussian Process Kernel API** -|details-split| - -The main usage of a :class:`Kernel` is to compute the GP's covariance between -datapoints. For this, the method ``__call__`` of the kernel can be called. This -method can either be used to compute the "auto-covariance" of all pairs of -datapoints in a 2d array X, or the "cross-covariance" of all combinations -of datapoints of a 2d array X with datapoints in a 2d array Y. The following -identity holds true for all kernels k (except for the :class:`WhiteKernel`): -``k(X) == K(X, Y=X)`` - -If only the diagonal of the auto-covariance is being used, the method ``diag()`` -of a kernel can be called, which is more computationally efficient than the -equivalent call to ``__call__``: ``np.diag(k(X, X)) == k.diag(X)`` - -Kernels are parameterized by a vector :math:`\theta` of hyperparameters. These -hyperparameters can for instance control length-scales or periodicity of a -kernel (see below). All kernels support computing analytic gradients -of the kernel's auto-covariance with respect to :math:`log(\theta)` via setting -``eval_gradient=True`` in the ``__call__`` method. -That is, a ``(len(X), len(X), len(theta))`` array is returned where the entry -``[i, j, l]`` contains :math:`\frac{\partial k_\theta(x_i, x_j)}{\partial log(\theta_l)}`. -This gradient is used by the Gaussian process (both regressor and classifier) -in computing the gradient of the log-marginal-likelihood, which in turn is used -to determine the value of :math:`\theta`, which maximizes the log-marginal-likelihood, -via gradient ascent. For each hyperparameter, the initial value and the -bounds need to be specified when creating an instance of the kernel. The -current value of :math:`\theta` can be get and set via the property -``theta`` of the kernel object. Moreover, the bounds of the hyperparameters can be -accessed by the property ``bounds`` of the kernel. Note that both properties -(theta and bounds) return log-transformed values of the internally used values -since those are typically more amenable to gradient-based optimization. -The specification of each hyperparameter is stored in the form of an instance of -:class:`Hyperparameter` in the respective kernel. Note that a kernel using a -hyperparameter with name "x" must have the attributes self.x and self.x_bounds. - -The abstract base class for all kernels is :class:`Kernel`. Kernel implements a -similar interface as :class:`~sklearn.base.BaseEstimator`, providing the -methods ``get_params()``, ``set_params()``, and ``clone()``. This allows -setting kernel values also via meta-estimators such as -:class:`~sklearn.pipeline.Pipeline` or -:class:`~sklearn.model_selection.GridSearchCV`. Note that due to the nested -structure of kernels (by applying kernel operators, see below), the names of -kernel parameters might become relatively complicated. In general, for a binary -kernel operator, parameters of the left operand are prefixed with ``k1__`` and -parameters of the right operand with ``k2__``. An additional convenience method -is ``clone_with_theta(theta)``, which returns a cloned version of the kernel -but with the hyperparameters set to ``theta``. An illustrative example: - - >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF - >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0)) - >>> for hyperparameter in kernel.hyperparameters: print(hyperparameter) - Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) - >>> params = kernel.get_params() - >>> for key in sorted(params): print("%s : %s" % (key, params[key])) - k1 : 1**2 * RBF(length_scale=0.5) - k1__k1 : 1**2 - k1__k1__constant_value : 1.0 - k1__k1__constant_value_bounds : (0.0, 10.0) - k1__k2 : RBF(length_scale=0.5) - k1__k2__length_scale : 0.5 - k1__k2__length_scale_bounds : (0.0, 10.0) - k2 : RBF(length_scale=2) - k2__length_scale : 2.0 - k2__length_scale_bounds : (0.0, 10.0) - >>> print(kernel.theta) # Note: log-transformed - [ 0. -0.69314718 0.69314718] - >>> print(kernel.bounds) # Note: log-transformed - [[ -inf 2.30258509] - [ -inf 2.30258509] - [ -inf 2.30258509]] - - -All Gaussian process kernels are interoperable with :mod:`sklearn.metrics.pairwise` -and vice versa: instances of subclasses of :class:`Kernel` can be passed as -``metric`` to ``pairwise_kernels`` from :mod:`sklearn.metrics.pairwise`. Moreover, -kernel functions from pairwise can be used as GP kernels by using the wrapper -class :class:`PairwiseKernel`. The only caveat is that the gradient of -the hyperparameters is not analytic but numeric and all those kernels support -only isotropic distances. The parameter ``gamma`` is considered to be a -hyperparameter and may be optimized. The other kernel parameters are set -directly at initialization and are kept fixed. - -|details-end| +Chapter 4 of [RW2006]_. :ref:`This example +` +shows how to define a custom kernel over discrete data. For guidance on how to best +combine different kernels, we refer to [Duv2014]_. + +.. dropdown:: Gaussian Process Kernel API + + The main usage of a :class:`Kernel` is to compute the GP's covariance between + datapoints. For this, the method ``__call__`` of the kernel can be called. This + method can either be used to compute the "auto-covariance" of all pairs of + datapoints in a 2d array X, or the "cross-covariance" of all combinations + of datapoints of a 2d array X with datapoints in a 2d array Y. The following + identity holds true for all kernels k (except for the :class:`WhiteKernel`): + ``k(X) == K(X, Y=X)`` + + If only the diagonal of the auto-covariance is being used, the method ``diag()`` + of a kernel can be called, which is more computationally efficient than the + equivalent call to ``__call__``: ``np.diag(k(X, X)) == k.diag(X)`` + + Kernels are parameterized by a vector :math:`\theta` of hyperparameters. These + hyperparameters can for instance control length-scales or periodicity of a + kernel (see below). All kernels support computing analytic gradients + of the kernel's auto-covariance with respect to :math:`log(\theta)` via setting + ``eval_gradient=True`` in the ``__call__`` method. + That is, a ``(len(X), len(X), len(theta))`` array is returned where the entry + ``[i, j, l]`` contains :math:`\frac{\partial k_\theta(x_i, x_j)}{\partial log(\theta_l)}`. + This gradient is used by the Gaussian process (both regressor and classifier) + in computing the gradient of the log-marginal-likelihood, which in turn is used + to determine the value of :math:`\theta`, which maximizes the log-marginal-likelihood, + via gradient ascent. For each hyperparameter, the initial value and the + bounds need to be specified when creating an instance of the kernel. The + current value of :math:`\theta` can be get and set via the property + ``theta`` of the kernel object. Moreover, the bounds of the hyperparameters can be + accessed by the property ``bounds`` of the kernel. Note that both properties + (theta and bounds) return log-transformed values of the internally used values + since those are typically more amenable to gradient-based optimization. + The specification of each hyperparameter is stored in the form of an instance of + :class:`Hyperparameter` in the respective kernel. Note that a kernel using a + hyperparameter with name "x" must have the attributes self.x and self.x_bounds. + + The abstract base class for all kernels is :class:`Kernel`. Kernel implements a + similar interface as :class:`~sklearn.base.BaseEstimator`, providing the + methods ``get_params()``, ``set_params()``, and ``clone()``. This allows + setting kernel values also via meta-estimators such as + :class:`~sklearn.pipeline.Pipeline` or + :class:`~sklearn.model_selection.GridSearchCV`. Note that due to the nested + structure of kernels (by applying kernel operators, see below), the names of + kernel parameters might become relatively complicated. In general, for a binary + kernel operator, parameters of the left operand are prefixed with ``k1__`` and + parameters of the right operand with ``k2__``. An additional convenience method + is ``clone_with_theta(theta)``, which returns a cloned version of the kernel + but with the hyperparameters set to ``theta``. An illustrative example: + + >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF + >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0)) + >>> for hyperparameter in kernel.hyperparameters: print(hyperparameter) + Hyperparameter(name='k1__k1__constant_value', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + Hyperparameter(name='k1__k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + Hyperparameter(name='k2__length_scale', value_type='numeric', bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) + >>> params = kernel.get_params() + >>> for key in sorted(params): print("%s : %s" % (key, params[key])) + k1 : 1**2 * RBF(length_scale=0.5) + k1__k1 : 1**2 + k1__k1__constant_value : 1.0 + k1__k1__constant_value_bounds : (0.0, 10.0) + k1__k2 : RBF(length_scale=0.5) + k1__k2__length_scale : 0.5 + k1__k2__length_scale_bounds : (0.0, 10.0) + k2 : RBF(length_scale=2) + k2__length_scale : 2.0 + k2__length_scale_bounds : (0.0, 10.0) + >>> print(kernel.theta) # Note: log-transformed + [ 0. -0.69314718 0.69314718] + >>> print(kernel.bounds) # Note: log-transformed + [[ -inf 2.30258509] + [ -inf 2.30258509] + [ -inf 2.30258509]] + + All Gaussian process kernels are interoperable with :mod:`sklearn.metrics.pairwise` + and vice versa: instances of subclasses of :class:`Kernel` can be passed as + ``metric`` to ``pairwise_kernels`` from :mod:`sklearn.metrics.pairwise`. Moreover, + kernel functions from pairwise can be used as GP kernels by using the wrapper + class :class:`PairwiseKernel`. The only caveat is that the gradient of + the hyperparameters is not analytic but numeric and all those kernels support + only isotropic distances. The parameter ``gamma`` is considered to be a + hyperparameter and may be optimized. The other kernel parameters are set + directly at initialization and are kept fixed. Basic kernels ------------- @@ -388,42 +390,38 @@ The :class:`Matern` kernel is a stationary kernel and a generalization of the :class:`RBF` kernel. It has an additional parameter :math:`\nu` which controls the smoothness of the resulting function. It is parameterized by a length-scale parameter :math:`l>0`, which can either be a scalar (isotropic variant of the kernel) or a vector with the same number of dimensions as the inputs :math:`x` (anisotropic variant of the kernel). -|details-start| -**Mathematical implementation of Matérn kernel** -|details-split| +.. dropdown:: Mathematical implementation of Matérn kernel -The kernel is given by: + The kernel is given by: -.. math:: + .. math:: - k(x_i, x_j) = \frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg)^\nu K_\nu\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg), + k(x_i, x_j) = \frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg)^\nu K_\nu\Bigg(\frac{\sqrt{2\nu}}{l} d(x_i , x_j )\Bigg), -where :math:`d(\cdot,\cdot)` is the Euclidean distance, :math:`K_\nu(\cdot)` is a modified Bessel function and :math:`\Gamma(\cdot)` is the gamma function. -As :math:`\nu\rightarrow\infty`, the Matérn kernel converges to the RBF kernel. -When :math:`\nu = 1/2`, the Matérn kernel becomes identical to the absolute -exponential kernel, i.e., + where :math:`d(\cdot,\cdot)` is the Euclidean distance, :math:`K_\nu(\cdot)` is a modified Bessel function and :math:`\Gamma(\cdot)` is the gamma function. + As :math:`\nu\rightarrow\infty`, the Matérn kernel converges to the RBF kernel. + When :math:`\nu = 1/2`, the Matérn kernel becomes identical to the absolute + exponential kernel, i.e., -.. math:: - k(x_i, x_j) = \exp \Bigg(- \frac{1}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{1}{2} + .. math:: + k(x_i, x_j) = \exp \Bigg(- \frac{1}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{1}{2} -In particular, :math:`\nu = 3/2`: + In particular, :math:`\nu = 3/2`: -.. math:: - k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{3}}{l} d(x_i , x_j )\Bigg) \exp \Bigg(-\frac{\sqrt{3}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{3}{2} - -and :math:`\nu = 5/2`: + .. math:: + k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{3}}{l} d(x_i , x_j )\Bigg) \exp \Bigg(-\frac{\sqrt{3}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{3}{2} -.. math:: - k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{5}}{l} d(x_i , x_j ) +\frac{5}{3l} d(x_i , x_j )^2 \Bigg) \exp \Bigg(-\frac{\sqrt{5}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{5}{2} + and :math:`\nu = 5/2`: -are popular choices for learning functions that are not infinitely -differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = -3/2`) or twice differentiable (:math:`\nu = 5/2`). + .. math:: + k(x_i, x_j) = \Bigg(1 + \frac{\sqrt{5}}{l} d(x_i , x_j ) +\frac{5}{3l} d(x_i , x_j )^2 \Bigg) \exp \Bigg(-\frac{\sqrt{5}}{l} d(x_i , x_j ) \Bigg) \quad \quad \nu= \tfrac{5}{2} -The flexibility of controlling the smoothness of the learned function via :math:`\nu` -allows adapting to the properties of the true underlying functional relation. + are popular choices for learning functions that are not infinitely + differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = + 3/2`) or twice differentiable (:math:`\nu = 5/2`). -|details-end| + The flexibility of controlling the smoothness of the learned function via :math:`\nu` + allows adapting to the properties of the true underlying functional relation. The prior and posterior of a GP resulting from a Matérn kernel are shown in the following figure: diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 01c5a5c72ee52..edb915b193e37 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -72,35 +72,41 @@ evaluated and the best combination is retained. .. currentmodule:: sklearn.model_selection -.. topic:: Examples: +.. rubric:: Examples - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` for an example of - Grid Search computation on the digits dataset. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` + for an example of Grid Search within a cross validation loop on the iris + dataset. This is the best practice for evaluating the performance of a + model with grid search. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` for an example - of Grid Search coupling parameters from a text documents feature - extractor (n-gram count vectorizer and TF-IDF transformer) with a - classifier (here a linear SVM trained with SGD with either elastic - net or L2 penalty) using a :class:`~sklearn.pipeline.Pipeline` instance. +- See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` for an example + of Grid Search coupling parameters from a text documents feature + extractor (n-gram count vectorizer and TF-IDF transformer) with a + classifier (here a linear SVM trained with SGD with either elastic + net or L2 penalty) using a :class:`~sklearn.pipeline.Pipeline` instance. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` - for an example of Grid Search within a cross validation loop on the iris - dataset. This is the best practice for evaluating the performance of a - model with grid search. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` - for an example of :class:`GridSearchCV` being used to evaluate multiple - metrics simultaneously. +.. dropdown:: Advanced examples - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_refit_callable.py` - for an example of using ``refit=callable`` interface in - :class:`GridSearchCV`. The example shows how this interface adds certain - amount of flexibility in identifying the "best" estimator. This interface - can also be used in multiple metrics evaluation. + - See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` + for an example of Grid Search within a cross validation loop on the iris + dataset. This is the best practice for evaluating the performance of a + model with grid search. + + - See :ref:`sphx_glr_auto_examples_model_selection_plot_multi_metric_evaluation.py` + for an example of :class:`GridSearchCV` being used to evaluate multiple + metrics simultaneously. + + - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_refit_callable.py` + for an example of using ``refit=callable`` interface in + :class:`GridSearchCV`. The example shows how this interface adds a certain + amount of flexibility in identifying the "best" estimator. This interface + can also be used in multiple metrics evaluation. + + - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py` + for an example of how to do a statistical comparison on the outputs of + :class:`GridSearchCV`. - - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_stats.py` - for an example of how to do a statistical comparison on the outputs of - :class:`GridSearchCV`. .. _randomized_parameter_search: @@ -161,16 +167,16 @@ variable that is log-uniformly distributed between ``1e0`` and ``1e3``:: 'kernel': ['rbf'], 'class_weight':['balanced', None]} -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_randomized_search.py` compares the usage and efficiency - of randomized search and grid search. +* :ref:`sphx_glr_auto_examples_model_selection_plot_randomized_search.py` compares the usage and efficiency + of randomized search and grid search. -.. topic:: References: +.. rubric:: References - * Bergstra, J. and Bengio, Y., - Random search for hyper-parameter optimization, - The Journal of Machine Learning Research (2012) +* Bergstra, J. and Bengio, Y., + Random search for hyper-parameter optimization, + The Journal of Machine Learning Research (2012) .. _successive_halving_user_guide: @@ -188,6 +194,11 @@ iteration, which will be allocated more resources. For parameter tuning, the resource is typically the number of training samples, but it can also be an arbitrary numeric parameter such as `n_estimators` in a random forest. +.. note:: + + The resource increase chosen should be large enough so that a large improvement + in scores is obtained when taking into account statistical significance. + As illustrated in the figure below, only a subset of candidates 'survive' until the last iteration. These are the candidates that have consistently ranked among the top-scoring candidates across all iterations. @@ -199,7 +210,7 @@ here the number of samples. :align: center We here briefly describe the main parameters, but each parameter and their -interactions are described in more details in the sections below. The +interactions are described more in detail in the dropdown section below. The ``factor`` (> 1) parameter controls the rate at which the resources grow, and the rate at which the number of candidates decreases. In each iteration, the number of resources per candidate is multiplied by ``factor`` and the number @@ -216,279 +227,272 @@ These estimators are still **experimental**: their predictions and their API might change without any deprecation cycle. To use them, you need to explicitly import ``enable_halving_search_cv``:: - >>> # explicitly require this experimental feature >>> from sklearn.experimental import enable_halving_search_cv # noqa - >>> # now you can import normally from model_selection >>> from sklearn.model_selection import HalvingGridSearchCV >>> from sklearn.model_selection import HalvingRandomSearchCV -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_heatmap.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_iterations.py` - -Choosing ``min_resources`` and the number of candidates -------------------------------------------------------- - -Beside ``factor``, the two main parameters that influence the behaviour of a -successive halving search are the ``min_resources`` parameter, and the -number of candidates (or parameter combinations) that are evaluated. -``min_resources`` is the amount of resources allocated at the first -iteration for each candidate. The number of candidates is specified directly -in :class:`HalvingRandomSearchCV`, and is determined from the ``param_grid`` -parameter of :class:`HalvingGridSearchCV`. - -Consider a case where the resource is the number of samples, and where we -have 1000 samples. In theory, with ``min_resources=10`` and ``factor=2``, we -are able to run **at most** 7 iterations with the following number of -samples: ``[10, 20, 40, 80, 160, 320, 640]``. - -But depending on the number of candidates, we might run less than 7 -iterations: if we start with a **small** number of candidates, the last -iteration might use less than 640 samples, which means not using all the -available resources (samples). For example if we start with 5 candidates, we -only need 2 iterations: 5 candidates for the first iteration, then -`5 // 2 = 2` candidates at the second iteration, after which we know which -candidate performs the best (so we don't need a third one). We would only be -using at most 20 samples which is a waste since we have 1000 samples at our -disposal. On the other hand, if we start with a **high** number of -candidates, we might end up with a lot of candidates at the last iteration, -which may not always be ideal: it means that many candidates will run with -the full resources, basically reducing the procedure to standard search. - -In the case of :class:`HalvingRandomSearchCV`, the number of candidates is set -by default such that the last iteration uses as much of the available -resources as possible. For :class:`HalvingGridSearchCV`, the number of -candidates is determined by the `param_grid` parameter. Changing the value of -``min_resources`` will impact the number of possible iterations, and as a -result will also have an effect on the ideal number of candidates. - -Another consideration when choosing ``min_resources`` is whether or not it -is easy to discriminate between good and bad candidates with a small amount -of resources. For example, if you need a lot of samples to distinguish -between good and bad parameters, a high ``min_resources`` is recommended. On -the other hand if the distinction is clear even with a small amount of -samples, then a small ``min_resources`` may be preferable since it would -speed up the computation. - -Notice in the example above that the last iteration does not use the maximum -amount of resources available: 1000 samples are available, yet only 640 are -used, at most. By default, both :class:`HalvingRandomSearchCV` and -:class:`HalvingGridSearchCV` try to use as many resources as possible in the -last iteration, with the constraint that this amount of resources must be a -multiple of both `min_resources` and `factor` (this constraint will be clear -in the next section). :class:`HalvingRandomSearchCV` achieves this by -sampling the right amount of candidates, while :class:`HalvingGridSearchCV` -achieves this by properly setting `min_resources`. Please see -:ref:`exhausting_the_resources` for details. - -.. _amount_of_resource_and_number_of_candidates: - -Amount of resource and number of candidates at each iteration -------------------------------------------------------------- - -At any iteration `i`, each candidate is allocated a given amount of resources -which we denote `n_resources_i`. This quantity is controlled by the -parameters ``factor`` and ``min_resources`` as follows (`factor` is strictly -greater than 1):: - - n_resources_i = factor**i * min_resources, - -or equivalently:: - - n_resources_{i+1} = n_resources_i * factor - -where ``min_resources == n_resources_0`` is the amount of resources used at -the first iteration. ``factor`` also defines the proportions of candidates -that will be selected for the next iteration:: - - n_candidates_i = n_candidates // (factor ** i) - -or equivalently:: - - n_candidates_0 = n_candidates - n_candidates_{i+1} = n_candidates_i // factor - -So in the first iteration, we use ``min_resources`` resources -``n_candidates`` times. In the second iteration, we use ``min_resources * -factor`` resources ``n_candidates // factor`` times. The third again -multiplies the resources per candidate and divides the number of candidates. -This process stops when the maximum amount of resource per candidate is -reached, or when we have identified the best candidate. The best candidate -is identified at the iteration that is evaluating `factor` or less candidates -(see just below for an explanation). - -Here is an example with ``min_resources=3`` and ``factor=2``, starting with -70 candidates: - -+-----------------------+-----------------------+ -| ``n_resources_i`` | ``n_candidates_i`` | -+=======================+=======================+ -| 3 (=min_resources) | 70 (=n_candidates) | -+-----------------------+-----------------------+ -| 3 * 2 = 6 | 70 // 2 = 35 | -+-----------------------+-----------------------+ -| 6 * 2 = 12 | 35 // 2 = 17 | -+-----------------------+-----------------------+ -| 12 * 2 = 24 | 17 // 2 = 8 | -+-----------------------+-----------------------+ -| 24 * 2 = 48 | 8 // 2 = 4 | -+-----------------------+-----------------------+ -| 48 * 2 = 96 | 4 // 2 = 2 | -+-----------------------+-----------------------+ - -We can note that: - -- the process stops at the first iteration which evaluates `factor=2` - candidates: the best candidate is the best out of these 2 candidates. It - is not necessary to run an additional iteration, since it would only - evaluate one candidate (namely the best one, which we have already - identified). For this reason, in general, we want the last iteration to - run at most ``factor`` candidates. If the last iteration evaluates more - than `factor` candidates, then this last iteration reduces to a regular - search (as in :class:`RandomizedSearchCV` or :class:`GridSearchCV`). -- each ``n_resources_i`` is a multiple of both ``factor`` and - ``min_resources`` (which is confirmed by its definition above). - -The amount of resources that is used at each iteration can be found in the -`n_resources_` attribute. - -Choosing a resource -------------------- - -By default, the resource is defined in terms of number of samples. That is, -each iteration will use an increasing amount of samples to train on. You can -however manually specify a parameter to use as the resource with the -``resource`` parameter. Here is an example where the resource is defined in -terms of the number of estimators of a random forest:: - - >>> from sklearn.datasets import make_classification - >>> from sklearn.ensemble import RandomForestClassifier - >>> from sklearn.experimental import enable_halving_search_cv # noqa - >>> from sklearn.model_selection import HalvingGridSearchCV - >>> import pandas as pd - >>> - >>> param_grid = {'max_depth': [3, 5, 10], - ... 'min_samples_split': [2, 5, 10]} - >>> base_estimator = RandomForestClassifier(random_state=0) - >>> X, y = make_classification(n_samples=1000, random_state=0) - >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, - ... factor=2, resource='n_estimators', - ... max_resources=30).fit(X, y) - >>> sh.best_estimator_ - RandomForestClassifier(max_depth=5, n_estimators=24, random_state=0) - -Note that it is not possible to budget on a parameter that is part of the -parameter grid. - -.. _exhausting_the_resources: - -Exhausting the available resources ----------------------------------- - -As mentioned above, the number of resources that is used at each iteration -depends on the `min_resources` parameter. -If you have a lot of resources available but start with a low number of -resources, some of them might be wasted (i.e. not used):: - - >>> from sklearn.datasets import make_classification - >>> from sklearn.svm import SVC - >>> from sklearn.experimental import enable_halving_search_cv # noqa - >>> from sklearn.model_selection import HalvingGridSearchCV - >>> import pandas as pd - >>> param_grid= {'kernel': ('linear', 'rbf'), - ... 'C': [1, 10, 100]} - >>> base_estimator = SVC(gamma='scale') - >>> X, y = make_classification(n_samples=1000) - >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, - ... factor=2, min_resources=20).fit(X, y) - >>> sh.n_resources_ - [20, 40, 80] - -The search process will only use 80 resources at most, while our maximum -amount of available resources is ``n_samples=1000``. Here, we have -``min_resources = r_0 = 20``. - -For :class:`HalvingGridSearchCV`, by default, the `min_resources` parameter -is set to 'exhaust'. This means that `min_resources` is automatically set -such that the last iteration can use as many resources as possible, within -the `max_resources` limit:: - - >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, - ... factor=2, min_resources='exhaust').fit(X, y) - >>> sh.n_resources_ - [250, 500, 1000] - -`min_resources` was here automatically set to 250, which results in the last -iteration using all the resources. The exact value that is used depends on -the number of candidate parameter, on `max_resources` and on `factor`. - -For :class:`HalvingRandomSearchCV`, exhausting the resources can be done in 2 -ways: - -- by setting `min_resources='exhaust'`, just like for - :class:`HalvingGridSearchCV`; -- by setting `n_candidates='exhaust'`. - -Both options are mutually exclusive: using `min_resources='exhaust'` requires -knowing the number of candidates, and symmetrically `n_candidates='exhaust'` -requires knowing `min_resources`. - -In general, exhausting the total number of resources leads to a better final -candidate parameter, and is slightly more time-intensive. +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_heatmap.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_successive_halving_iterations.py` + +The sections below dive into technical aspects of successive halving. + +.. dropdown:: Choosing ``min_resources`` and the number of candidates + + Beside ``factor``, the two main parameters that influence the behaviour of a + successive halving search are the ``min_resources`` parameter, and the + number of candidates (or parameter combinations) that are evaluated. + ``min_resources`` is the amount of resources allocated at the first + iteration for each candidate. The number of candidates is specified directly + in :class:`HalvingRandomSearchCV`, and is determined from the ``param_grid`` + parameter of :class:`HalvingGridSearchCV`. + + Consider a case where the resource is the number of samples, and where we + have 1000 samples. In theory, with ``min_resources=10`` and ``factor=2``, we + are able to run **at most** 7 iterations with the following number of + samples: ``[10, 20, 40, 80, 160, 320, 640]``. + + But depending on the number of candidates, we might run less than 7 + iterations: if we start with a **small** number of candidates, the last + iteration might use less than 640 samples, which means not using all the + available resources (samples). For example if we start with 5 candidates, we + only need 2 iterations: 5 candidates for the first iteration, then + `5 // 2 = 2` candidates at the second iteration, after which we know which + candidate performs the best (so we don't need a third one). We would only be + using at most 20 samples which is a waste since we have 1000 samples at our + disposal. On the other hand, if we start with a **high** number of + candidates, we might end up with a lot of candidates at the last iteration, + which may not always be ideal: it means that many candidates will run with + the full resources, basically reducing the procedure to standard search. + + In the case of :class:`HalvingRandomSearchCV`, the number of candidates is set + by default such that the last iteration uses as much of the available + resources as possible. For :class:`HalvingGridSearchCV`, the number of + candidates is determined by the `param_grid` parameter. Changing the value of + ``min_resources`` will impact the number of possible iterations, and as a + result will also have an effect on the ideal number of candidates. + + Another consideration when choosing ``min_resources`` is whether or not it + is easy to discriminate between good and bad candidates with a small amount + of resources. For example, if you need a lot of samples to distinguish + between good and bad parameters, a high ``min_resources`` is recommended. On + the other hand if the distinction is clear even with a small amount of + samples, then a small ``min_resources`` may be preferable since it would + speed up the computation. + + Notice in the example above that the last iteration does not use the maximum + amount of resources available: 1000 samples are available, yet only 640 are + used, at most. By default, both :class:`HalvingRandomSearchCV` and + :class:`HalvingGridSearchCV` try to use as many resources as possible in the + last iteration, with the constraint that this amount of resources must be a + multiple of both `min_resources` and `factor` (this constraint will be clear + in the next section). :class:`HalvingRandomSearchCV` achieves this by + sampling the right amount of candidates, while :class:`HalvingGridSearchCV` + achieves this by properly setting `min_resources`. + + +.. dropdown:: Amount of resource and number of candidates at each iteration + + At any iteration `i`, each candidate is allocated a given amount of resources + which we denote `n_resources_i`. This quantity is controlled by the + parameters ``factor`` and ``min_resources`` as follows (`factor` is strictly + greater than 1):: + + n_resources_i = factor**i * min_resources, + + or equivalently:: + + n_resources_{i+1} = n_resources_i * factor + + where ``min_resources == n_resources_0`` is the amount of resources used at + the first iteration. ``factor`` also defines the proportions of candidates + that will be selected for the next iteration:: + + n_candidates_i = n_candidates // (factor ** i) + + or equivalently:: + + n_candidates_0 = n_candidates + n_candidates_{i+1} = n_candidates_i // factor + + So in the first iteration, we use ``min_resources`` resources + ``n_candidates`` times. In the second iteration, we use ``min_resources * + factor`` resources ``n_candidates // factor`` times. The third again + multiplies the resources per candidate and divides the number of candidates. + This process stops when the maximum amount of resource per candidate is + reached, or when we have identified the best candidate. The best candidate + is identified at the iteration that is evaluating `factor` or less candidates + (see just below for an explanation). + + Here is an example with ``min_resources=3`` and ``factor=2``, starting with + 70 candidates: + + +-----------------------+-----------------------+ + | ``n_resources_i`` | ``n_candidates_i`` | + +=======================+=======================+ + | 3 (=min_resources) | 70 (=n_candidates) | + +-----------------------+-----------------------+ + | 3 * 2 = 6 | 70 // 2 = 35 | + +-----------------------+-----------------------+ + | 6 * 2 = 12 | 35 // 2 = 17 | + +-----------------------+-----------------------+ + | 12 * 2 = 24 | 17 // 2 = 8 | + +-----------------------+-----------------------+ + | 24 * 2 = 48 | 8 // 2 = 4 | + +-----------------------+-----------------------+ + | 48 * 2 = 96 | 4 // 2 = 2 | + +-----------------------+-----------------------+ + + We can note that: + + - the process stops at the first iteration which evaluates `factor=2` + candidates: the best candidate is the best out of these 2 candidates. It + is not necessary to run an additional iteration, since it would only + evaluate one candidate (namely the best one, which we have already + identified). For this reason, in general, we want the last iteration to + run at most ``factor`` candidates. If the last iteration evaluates more + than `factor` candidates, then this last iteration reduces to a regular + search (as in :class:`RandomizedSearchCV` or :class:`GridSearchCV`). + - each ``n_resources_i`` is a multiple of both ``factor`` and + ``min_resources`` (which is confirmed by its definition above). + + The amount of resources that is used at each iteration can be found in the + `n_resources_` attribute. + +.. dropdown:: Choosing a resource + + By default, the resource is defined in terms of number of samples. That is, + each iteration will use an increasing amount of samples to train on. You can + however manually specify a parameter to use as the resource with the + ``resource`` parameter. Here is an example where the resource is defined in + terms of the number of estimators of a random forest:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.experimental import enable_halving_search_cv # noqa + >>> from sklearn.model_selection import HalvingGridSearchCV + >>> import pandas as pd + >>> param_grid = {'max_depth': [3, 5, 10], + ... 'min_samples_split': [2, 5, 10]} + >>> base_estimator = RandomForestClassifier(random_state=0) + >>> X, y = make_classification(n_samples=1000, random_state=0) + >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, + ... factor=2, resource='n_estimators', + ... max_resources=30).fit(X, y) + >>> sh.best_estimator_ + RandomForestClassifier(max_depth=5, n_estimators=24, random_state=0) + + Note that it is not possible to budget on a parameter that is part of the + parameter grid. + + +.. dropdown:: Exhausting the available resources + + As mentioned above, the number of resources that is used at each iteration + depends on the `min_resources` parameter. + If you have a lot of resources available but start with a low number of + resources, some of them might be wasted (i.e. not used):: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.svm import SVC + >>> from sklearn.experimental import enable_halving_search_cv # noqa + >>> from sklearn.model_selection import HalvingGridSearchCV + >>> import pandas as pd + >>> param_grid= {'kernel': ('linear', 'rbf'), + ... 'C': [1, 10, 100]} + >>> base_estimator = SVC(gamma='scale') + >>> X, y = make_classification(n_samples=1000) + >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, + ... factor=2, min_resources=20).fit(X, y) + >>> sh.n_resources_ + [20, 40, 80] + + The search process will only use 80 resources at most, while our maximum + amount of available resources is ``n_samples=1000``. Here, we have + ``min_resources = r_0 = 20``. + + For :class:`HalvingGridSearchCV`, by default, the `min_resources` parameter + is set to 'exhaust'. This means that `min_resources` is automatically set + such that the last iteration can use as many resources as possible, within + the `max_resources` limit:: + + >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, + ... factor=2, min_resources='exhaust').fit(X, y) + >>> sh.n_resources_ + [250, 500, 1000] + + `min_resources` was here automatically set to 250, which results in the last + iteration using all the resources. The exact value that is used depends on + the number of candidate parameters, on `max_resources` and on `factor`. + + For :class:`HalvingRandomSearchCV`, exhausting the resources can be done in 2 + ways: + + - by setting `min_resources='exhaust'`, just like for + :class:`HalvingGridSearchCV`; + - by setting `n_candidates='exhaust'`. + + Both options are mutually exclusive: using `min_resources='exhaust'` requires + knowing the number of candidates, and symmetrically `n_candidates='exhaust'` + requires knowing `min_resources`. + + In general, exhausting the total number of resources leads to a better final + candidate parameter, and is slightly more time-intensive. .. _aggressive_elimination: Aggressive elimination of candidates ------------------------------------ -Ideally, we want the last iteration to evaluate ``factor`` candidates (see -:ref:`amount_of_resource_and_number_of_candidates`). We then just have to -pick the best one. When the number of available resources is small with -respect to the number of candidates, the last iteration may have to evaluate -more than ``factor`` candidates:: - - >>> from sklearn.datasets import make_classification - >>> from sklearn.svm import SVC - >>> from sklearn.experimental import enable_halving_search_cv # noqa - >>> from sklearn.model_selection import HalvingGridSearchCV - >>> import pandas as pd - >>> - >>> - >>> param_grid = {'kernel': ('linear', 'rbf'), - ... 'C': [1, 10, 100]} - >>> base_estimator = SVC(gamma='scale') - >>> X, y = make_classification(n_samples=1000) - >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, - ... factor=2, max_resources=40, - ... aggressive_elimination=False).fit(X, y) - >>> sh.n_resources_ - [20, 40] - >>> sh.n_candidates_ - [6, 3] - -Since we cannot use more than ``max_resources=40`` resources, the process -has to stop at the second iteration which evaluates more than ``factor=2`` -candidates. - Using the ``aggressive_elimination`` parameter, you can force the search process to end up with less than ``factor`` candidates at the last -iteration. To do this, the process will eliminate as many candidates as -necessary using ``min_resources`` resources:: - - >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, - ... factor=2, - ... max_resources=40, - ... aggressive_elimination=True, - ... ).fit(X, y) - >>> sh.n_resources_ - [20, 20, 40] - >>> sh.n_candidates_ - [6, 3, 2] - -Notice that we end with 2 candidates at the last iteration since we have -eliminated enough candidates during the first iterations, using ``n_resources = -min_resources = 20``. +iteration. + +.. dropdown:: Code example of aggressive elimination + + Ideally, we want the last iteration to evaluate ``factor`` candidates. We + then just have to pick the best one. When the number of available resources is + small with respect to the number of candidates, the last iteration may have to + evaluate more than ``factor`` candidates:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.svm import SVC + >>> from sklearn.experimental import enable_halving_search_cv # noqa + >>> from sklearn.model_selection import HalvingGridSearchCV + >>> import pandas as pd + >>> param_grid = {'kernel': ('linear', 'rbf'), + ... 'C': [1, 10, 100]} + >>> base_estimator = SVC(gamma='scale') + >>> X, y = make_classification(n_samples=1000) + >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, + ... factor=2, max_resources=40, + ... aggressive_elimination=False).fit(X, y) + >>> sh.n_resources_ + [20, 40] + >>> sh.n_candidates_ + [6, 3] + + Since we cannot use more than ``max_resources=40`` resources, the process + has to stop at the second iteration which evaluates more than ``factor=2`` + candidates. + + When using ``aggressive_elimination``, the process will eliminate as many + candidates as necessary using ``min_resources`` resources:: + + >>> sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5, + ... factor=2, + ... max_resources=40, + ... aggressive_elimination=True, + ... ).fit(X, y) + >>> sh.n_resources_ + [20, 20, 40] + >>> sh.n_candidates_ + [6, 3, 2] + + Notice that we end with 2 candidates at the last iteration since we have + eliminated enough candidates during the first iterations, using ``n_resources = + min_resources = 20``. .. _successive_halving_cv_results: @@ -502,41 +506,44 @@ pd.DataFrame(est.cv_results_)``. The ``cv_results_`` attribute of to that of :class:`GridSearchCV` and :class:`RandomizedSearchCV`, with additional information related to the successive halving process. -Here is an example with some of the columns of a (truncated) dataframe: - -==== ====== =============== ================= ======================================================================================== - .. iter n_resources mean_test_score params -==== ====== =============== ================= ======================================================================================== - 0 0 125 0.983667 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 5} - 1 0 125 0.983667 {'criterion': 'gini', 'max_depth': None, 'max_features': 8, 'min_samples_split': 7} - 2 0 125 0.983667 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10} - 3 0 125 0.983667 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 6, 'min_samples_split': 6} - ... ... ... ... ... - 15 2 500 0.951958 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10} - 16 2 500 0.947958 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10} - 17 2 500 0.951958 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4} - 18 3 1000 0.961009 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10} - 19 3 1000 0.955989 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4} -==== ====== =============== ================= ======================================================================================== - -Each row corresponds to a given parameter combination (a candidate) and a given -iteration. The iteration is given by the ``iter`` column. The ``n_resources`` -column tells you how many resources were used. - -In the example above, the best parameter combination is ``{'criterion': -'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10}`` -since it has reached the last iteration (3) with the highest score: -0.96. - -.. topic:: References: - - .. [1] K. Jamieson, A. Talwalkar, - `Non-stochastic Best Arm Identification and Hyperparameter - Optimization `_, in - proc. of Machine Learning Research, 2016. - .. [2] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar, - :arxiv:`Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization - <1603.06560>`, in Machine Learning Research 18, 2018. +.. dropdown:: Example of a (truncated) output dataframe: + + ==== ====== =============== ================= ======================================================================================== + .. iter n_resources mean_test_score params + ==== ====== =============== ================= ======================================================================================== + 0 0 125 0.983667 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 5} + 1 0 125 0.983667 {'criterion': 'gini', 'max_depth': None, 'max_features': 8, 'min_samples_split': 7} + 2 0 125 0.983667 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10} + 3 0 125 0.983667 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 6, 'min_samples_split': 6} + ... ... ... ... ... + 15 2 500 0.951958 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10} + 16 2 500 0.947958 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 10} + 17 2 500 0.951958 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4} + 18 3 1000 0.961009 {'criterion': 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10} + 19 3 1000 0.955989 {'criterion': 'gini', 'max_depth': None, 'max_features': 10, 'min_samples_split': 4} + ==== ====== =============== ================= ======================================================================================== + + Each row corresponds to a given parameter combination (a candidate) and a given + iteration. The iteration is given by the ``iter`` column. The ``n_resources`` + column tells you how many resources were used. + + In the example above, the best parameter combination is ``{'criterion': + 'log_loss', 'max_depth': None, 'max_features': 9, 'min_samples_split': 10}`` + since it has reached the last iteration (3) with the highest score: + 0.96. + + .. rubric:: References + + .. [1] K. Jamieson, A. Talwalkar, + `Non-stochastic Best Arm Identification and Hyperparameter + Optimization `_, in + proc. of Machine Learning Research, 2016. + + .. [2] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar, + :arxiv:`Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization + <1603.06560>`, in Machine Learning Research 18, 2018. + + .. _grid_search_tips: @@ -548,14 +555,15 @@ Tips for parameter search Specifying an objective metric ------------------------------ -By default, parameter search uses the ``score`` function of the estimator -to evaluate a parameter setting. These are the +By default, parameter search uses the ``score`` function of the estimator to +evaluate a parameter setting. These are the :func:`sklearn.metrics.accuracy_score` for classification and -:func:`sklearn.metrics.r2_score` for regression. For some applications, -other scoring functions are better suited (for example in unbalanced -classification, the accuracy score is often uninformative). An alternative -scoring function can be specified via the ``scoring`` parameter of most -parameter search tools. See :ref:`scoring_parameter` for more details. +:func:`sklearn.metrics.r2_score` for regression. For some applications, other +scoring functions are better suited (for example in unbalanced classification, +the accuracy score is often uninformative), see :ref:`which_scoring_function` +for some guidance. An alternative scoring function can be specified via the +``scoring`` parameter of most parameter search tools, see +:ref:`scoring_parameter` for more details. .. _multimetric_grid_search: @@ -605,7 +613,7 @@ parameters of composite or nested estimators such as >>> search = GridSearchCV(calibrated_forest, param_grid, cv=5) >>> search.fit(X, y) GridSearchCV(cv=5, - estimator=CalibratedClassifierCV(...), + estimator=CalibratedClassifierCV(estimator=RandomForestClassifier(n_estimators=10)), param_grid={'estimator__max_depth': [2, 4, 6, 8]}) Here, ```` is the parameter name of the nested estimator, @@ -654,12 +662,11 @@ entry for :term:`n_jobs`. Robustness to failure --------------------- -Some parameter settings may result in a failure to ``fit`` one or more folds -of the data. By default, this will cause the entire search to fail, even if -some parameter settings could be fully evaluated. Setting ``error_score=0`` -(or `=np.nan`) will make the procedure robust to such failure, issuing a -warning and setting the score for that fold to 0 (or `nan`), but completing -the search. +Some parameter settings may result in a failure to ``fit`` one or more folds of +the data. By default, the score for those settings will be `np.nan`. This can +be controlled by setting `error_score="raise"` to raise an exception if one fit +fails, or for example `error_score=0` to set another value for the score of +failing parameter combinations. .. _alternative_cv: @@ -718,7 +725,7 @@ model selection: Out of Bag Estimates -------------------- -When using ensemble methods base upon bagging, i.e. generating new +When using ensemble methods based upon bagging, i.e. generating new training sets using sampling with replacement, part of the training set remains unused. For each classifier in the ensemble, a different part of the training set is left out. diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst index f5879cbffc0a5..59367b647dd58 100644 --- a/doc/modules/impute.rst +++ b/doc/modules/impute.rst @@ -50,7 +50,7 @@ that contain the missing values:: >>> X = [[np.nan, 2], [6, np.nan], [7, 6]] >>> print(imp.transform(X)) [[4. 2. ] - [6. 3.666...] + [6. 3.666] [7. 6. ]] The :class:`SimpleImputer` class also supports sparse matrices:: @@ -110,9 +110,9 @@ imputation round are returned. This estimator is still **experimental** for now: default parameters or details of behaviour might change without any deprecation cycle. Resolving the following issues would help stabilize :class:`IterativeImputer`: - convergence criteria (:issue:`14338`), default estimators (:issue:`13286`), - and use of random state (:issue:`15611`). To use it, you need to explicitly - import ``enable_iterative_imputer``. + convergence criteria (:issue:`14338`) and default estimators + (:issue:`13286`). To use it, you need to explicitly import + ``enable_iterative_imputer``. :: @@ -175,8 +175,7 @@ Note that a call to the ``transform`` method of :class:`IterativeImputer` is not allowed to change the number of samples. Therefore multiple imputations cannot be achieved by a single call to ``transform``. -References ----------- +.. rubric:: References .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate Imputation by Chained Equations in R". Journal of Statistical Software 45: @@ -224,13 +223,13 @@ neighbors of samples with missing values:: For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`. -.. topic:: References +.. rubric:: References - .. [OL2001] `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, - Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, - Missing value estimation methods for DNA microarrays, BIOINFORMATICS - Vol. 17 no. 6, 2001 Pages 520-525. - `_ +.. [OL2001] `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, + Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, + Missing value estimation methods for DNA microarrays, BIOINFORMATICS + Vol. 17 no. 6, 2001 Pages 520-525. + `_ Keeping the number of features constant ======================================= @@ -250,7 +249,7 @@ imputation. While this feature will not help in predictive setting, dropping the columns will change the shape of `X` which could be problematic when using imputers in a more complex machine-learning pipeline. The parameter `keep_empty_features` offers the option to keep the empty features by imputing -with a constant values. In most of the cases, this constant value is zero:: +with a constant value. In most of the cases, this constant value is zero:: >>> imputer.set_params(keep_empty_features=True) SimpleImputer(keep_empty_features=True) diff --git a/doc/modules/isotonic.rst b/doc/modules/isotonic.rst index 6cfdc1669de5d..50fbdb24e72c7 100644 --- a/doc/modules/isotonic.rst +++ b/doc/modules/isotonic.rst @@ -32,6 +32,6 @@ thus form a function that is piecewise linear: :target: ../auto_examples/miscellaneous/plot_isotonic_regression.html :align: center -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_isotonic_regression.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_isotonic_regression.py` diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst index 0c67c36178e3b..0bbd19d05de33 100644 --- a/doc/modules/kernel_approximation.rst +++ b/doc/modules/kernel_approximation.rst @@ -42,7 +42,7 @@ computational complexity of the exact method is :math:`\mathcal{O}(n^3_{\text{samples}})`, the complexity of the approximation is :math:`\mathcal{O}(n^2_{\text{components}} \cdot n_{\text{samples}})`, where one can set :math:`n_{\text{components}} \ll n_{\text{samples}}` without a -significative decrease in performance [WS2001]_. +significant decrease in performance [WS2001]_. We can construct the eigendecomposition of the kernel matrix :math:`K`, based on the features of the data, and then split it into sampled and unsampled data @@ -88,12 +88,12 @@ function or a precomputed kernel matrix. The number of samples used - which is also the dimensionality of the features computed - is given by the parameter ``n_components``. -.. topic:: Examples: +.. rubric:: Examples - * See the example entitled - :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`, - that shows an efficient machine learning pipeline that uses a - :class:`Nystroem` kernel. +* See the example entitled + :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`, + that shows an efficient machine learning pipeline that uses a + :class:`Nystroem` kernel. .. _rbf_kernel_approx: @@ -143,9 +143,9 @@ use of larger feature spaces more efficient. Comparing an exact RBF kernel (left) with the approximation (right) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` .. _additive_chi_kernel_approx: @@ -241,9 +241,9 @@ In addition, this method can transform samples in time, where :math:`n_{\text{components}}` is the desired output dimension, determined by ``n_components``. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py` +* :ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py` .. _tensor_sketch_kernel_approx: @@ -283,29 +283,29 @@ The classes in this submodule allow to approximate the embedding or store training examples. -.. topic:: References: - - .. [WS2001] `"Using the Nyström method to speed up kernel machines" - `_ - Williams, C.K.I.; Seeger, M. - 2001. - .. [RR2007] `"Random features for large-scale kernel machines" - `_ - Rahimi, A. and Recht, B. - Advances in neural information processing 2007, - .. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels" - `_ - Li, F., Ionescu, C., and Sminchisescu, C. - - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. - .. [VZ2010] `"Efficient additive kernels via explicit feature maps" - `_ - Vedaldi, A. and Zisserman, A. - Computer Vision and Pattern Recognition 2010 - .. [VVZ2010] `"Generalized RBF feature maps for Efficient Detection" - `_ - Vempati, S. and Vedaldi, A. and Zisserman, A. and Jawahar, CV - 2010 - .. [PP2013] :doi:`"Fast and scalable polynomial kernels via explicit feature maps" - <10.1145/2487575.2487591>` - Pham, N., & Pagh, R. - 2013 - .. [CCF2002] `"Finding frequent items in data streams" - `_ - Charikar, M., Chen, K., & Farach-Colton - 2002 - .. [WIKICS] `"Wikipedia: Count sketch" - `_ +.. rubric:: References + +.. [WS2001] `"Using the Nyström method to speed up kernel machines" + `_ + Williams, C.K.I.; Seeger, M. - 2001. +.. [RR2007] `"Random features for large-scale kernel machines" + `_ + Rahimi, A. and Recht, B. - Advances in neural information processing 2007, +.. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels" + `_ + Li, F., Ionescu, C., and Sminchisescu, C. + - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. +.. [VZ2010] `"Efficient additive kernels via explicit feature maps" + `_ + Vedaldi, A. and Zisserman, A. - Computer Vision and Pattern Recognition 2010 +.. [VVZ2010] `"Generalized RBF feature maps for Efficient Detection" + `_ + Vempati, S. and Vedaldi, A. and Zisserman, A. and Jawahar, CV - 2010 +.. [PP2013] :doi:`"Fast and scalable polynomial kernels via explicit feature maps" + <10.1145/2487575.2487591>` + Pham, N., & Pagh, R. - 2013 +.. [CCF2002] `"Finding frequent items in data streams" + `_ + Charikar, M., Chen, K., & Farach-Colton - 2002 +.. [WIKICS] `"Wikipedia: Count sketch" + `_ diff --git a/doc/modules/kernel_ridge.rst b/doc/modules/kernel_ridge.rst index 5d25ce71f5ea1..fcc19a49628c4 100644 --- a/doc/modules/kernel_ridge.rst +++ b/doc/modules/kernel_ridge.rst @@ -55,11 +55,11 @@ dense model. :target: ../auto_examples/miscellaneous/plot_kernel_ridge_regression.html :align: center -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_ridge_regression.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_ridge_regression.py` -.. topic:: References: +.. rubric:: References - .. [M2012] "Machine Learning: A Probabilistic Perspective" - Murphy, K. P. - chapter 14.4.3, pp. 492-493, The MIT Press, 2012 +.. [M2012] "Machine Learning: A Probabilistic Perspective" + Murphy, K. P. - chapter 14.4.3, pp. 492-493, The MIT Press, 2012 diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst index 850a848fe3f73..405ef8e5d3a8b 100644 --- a/doc/modules/lda_qda.rst +++ b/doc/modules/lda_qda.rst @@ -29,10 +29,10 @@ Discriminant Analysis can only learn linear boundaries, while Quadratic Discriminant Analysis can learn quadratic boundaries and is therefore more flexible. -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`: Comparison of LDA and QDA - on synthetic data. +* :ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`: Comparison of LDA and + QDA on synthetic data. Dimensionality reduction using Linear Discriminant Analysis =========================================================== @@ -49,10 +49,10 @@ This is implemented in the `transform` method. The desired dimensionality can be set using the ``n_components`` parameter. This parameter has no influence on the `fit` and `predict` methods. -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`: Comparison of LDA and PCA - for dimensionality reduction of the Iris dataset +* :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`: Comparison of LDA and + PCA for dimensionality reduction of the Iris dataset .. _lda_qda_math: @@ -93,10 +93,10 @@ predicted class is the one that maximises this log-posterior. .. note:: **Relation with Gaussian Naive Bayes** - If in the QDA model one assumes that the covariance matrices are diagonal, - then the inputs are assumed to be conditionally independent in each class, - and the resulting classifier is equivalent to the Gaussian Naive Bayes - classifier :class:`naive_bayes.GaussianNB`. + If in the QDA model one assumes that the covariance matrices are diagonal, + then the inputs are assumed to be conditionally independent in each class, + and the resulting classifier is equivalent to the Gaussian Naive Bayes + classifier :class:`naive_bayes.GaussianNB`. LDA --- @@ -194,10 +194,10 @@ Oracle Approximating Shrinkage estimator :class:`sklearn.covariance.OAS` yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's formula used with shrinkage="auto". In LDA, the data are assumed to be gaussian conditionally to the class. If these assumptions hold, using LDA with -the OAS estimator of covariance will yield a better classification +the OAS estimator of covariance will yield a better classification accuracy than if Ledoit and Wolf or the empirical covariance estimator is used. -The covariance estimator can be chosen using with the ``covariance_estimator`` +The covariance estimator can be chosen using the ``covariance_estimator`` parameter of the :class:`discriminant_analysis.LinearDiscriminantAnalysis` class. A covariance estimator should have a :term:`fit` method and a ``covariance_`` attribute like all covariance estimators in the @@ -210,10 +210,10 @@ class. A covariance estimator should have a :term:`fit` method and a .. centered:: |shrinkage| -.. topic:: Examples: +.. rubric:: Examples - :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers - with Empirical, Ledoit Wolf and OAS covariance estimator. +* :ref:`sphx_glr_auto_examples_classification_plot_lda.py`: Comparison of LDA classifiers + with Empirical, Ledoit Wolf and OAS covariance estimator. Estimation algorithms ===================== @@ -253,13 +253,13 @@ transform, and it supports shrinkage. However, the 'eigen' solver needs to compute the covariance matrix, so it might not be suitable for situations with a high number of features. -.. topic:: References: +.. rubric:: References - .. [1] "The Elements of Statistical Learning", Hastie T., Tibshirani R., - Friedman J., Section 4.3, p.106-119, 2008. +.. [1] "The Elements of Statistical Learning", Hastie T., Tibshirani R., + Friedman J., Section 4.3, p.106-119, 2008. - .. [2] Ledoit O, Wolf M. Honey, I Shrunk the Sample Covariance Matrix. - The Journal of Portfolio Management 30(4), 110-119, 2004. +.. [2] Ledoit O, Wolf M. Honey, I Shrunk the Sample Covariance Matrix. + The Journal of Portfolio Management 30(4), 110-119, 2004. - .. [3] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification - (Second Edition), section 2.6.2. +.. [3] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification + (Second Edition), section 2.6.2. diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst index 3d458a1a67416..6dca0a29af7cb 100644 --- a/doc/modules/learning_curve.rst +++ b/doc/modules/learning_curve.rst @@ -39,11 +39,11 @@ easy to see whether the estimator suffers from bias or variance. However, in high-dimensional spaces, models can become very difficult to visualize. For this reason, it is often helpful to use the tools described below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_plot_underfitting_overfitting.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py` - * :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_underfitting_overfitting.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_train_error_vs_test_error.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py` .. _validation_curve: @@ -83,13 +83,13 @@ The function :func:`validation_curve` can help in this case:: ... SVC(kernel="linear"), X, y, param_name="C", param_range=np.logspace(-7, 3, 3), ... ) >>> train_scores - array([[0.90..., 0.94..., 0.91..., 0.89..., 0.92...], - [0.9... , 0.92..., 0.93..., 0.92..., 0.93...], - [0.97..., 1... , 0.98..., 0.97..., 0.99...]]) + array([[0.90, 0.94, 0.91, 0.89, 0.92], + [0.9 , 0.92, 0.93, 0.92, 0.93], + [0.97, 1 , 0.98, 0.97, 0.99]]) >>> valid_scores - array([[0.9..., 0.9... , 0.9... , 0.96..., 0.9... ], - [0.9..., 0.83..., 0.96..., 0.96..., 0.93...], - [1.... , 0.93..., 1.... , 1.... , 0.9... ]]) + array([[0.9, 0.9 , 0.9 , 0.96, 0.9 ], + [0.9, 0.83, 0.96, 0.96, 0.93], + [1. , 0.93, 1 , 1 , 0.9 ]]) If you intend to plot the validation curves only, the class :class:`~sklearn.model_selection.ValidationCurveDisplay` is more direct than @@ -115,14 +115,7 @@ to :func:`validation_curve` to generate and plot the validation curve: If the training score and the validation score are both low, the estimator will be underfitting. If the training score is high and the validation score is low, the estimator is overfitting and otherwise it is working very well. A low -training score and a high validation score is usually not possible. Underfitting, -overfitting, and a working model are shown in the in the plot below where we vary -the parameter `gamma` of an SVM with an RBF kernel on the digits dataset. - -.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_validation_curve_001.png - :target: ../auto_examples/model_selection/plot_validation_curve.html - :align: center - :scale: 50% +training score and a high validation score is usually not possible. .. _learning_curve: @@ -161,13 +154,13 @@ average scores on the validation sets):: >>> train_sizes array([ 50, 80, 110]) >>> train_scores - array([[0.98..., 0.98 , 0.98..., 0.98..., 0.98...], - [0.98..., 1. , 0.98..., 0.98..., 0.98...], - [0.98..., 1. , 0.98..., 0.98..., 0.99...]]) + array([[0.98, 0.98 , 0.98, 0.98, 0.98], + [0.98, 1. , 0.98, 0.98, 0.98], + [0.98, 1. , 0.98, 0.98, 0.99]]) >>> valid_scores - array([[1. , 0.93..., 1. , 1. , 0.96...], - [1. , 0.96..., 1. , 1. , 0.96...], - [1. , 0.96..., 1. , 1. , 0.96...]]) + array([[1. , 0.93, 1. , 1. , 0.96], + [1. , 0.96, 1. , 1. , 0.96], + [1. , 0.96, 1. , 1. , 0.96]]) If you intend to plot the learning curves only, the class :class:`~sklearn.model_selection.LearningCurveDisplay` will be easier to use. @@ -187,3 +180,8 @@ to :func:`learning_curve` to generate and plot the learning curve: X, y = shuffle(X, y, random_state=0) LearningCurveDisplay.from_estimator( SVC(kernel="linear"), X, y, train_sizes=[50, 80, 110], cv=5) + +.. rubric:: Examples + +* See :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py` for an + example of using learning curves to check the scalability of a predictive model. diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index dd975c4d6e417..69a2bf9b7f477 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -32,14 +32,14 @@ solves a problem of the form: .. math:: \min_{w} || X w - y||_2^2 -.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_ols_001.png - :target: ../auto_examples/linear_model/plot_ols.html +.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_001.png + :target: ../auto_examples/linear_model/plot_ols_ridge.html :align: center :scale: 50% -:class:`LinearRegression` will take in its ``fit`` method arrays ``X``, ``y`` -and will store the coefficients :math:`w` of the linear model in its -``coef_`` member:: +:class:`LinearRegression` takes in its ``fit`` method arguments ``X``, ``y``, +``sample_weight`` and stores the coefficients :math:`w` of the linear model in its +``coef_`` and ``intercept_`` attributes:: >>> from sklearn import linear_model >>> reg = linear_model.LinearRegression() @@ -47,9 +47,11 @@ and will store the coefficients :math:`w` of the linear model in its LinearRegression() >>> reg.coef_ array([0.5, 0.5]) + >>> reg.intercept_ + 0.0 The coefficient estimates for Ordinary Least Squares rely on the -independence of the features. When features are correlated and the +independence of the features. When features are correlated and some columns of the design matrix :math:`X` have an approximately linear dependence, the design matrix becomes close to singular and as a result, the least-squares estimate becomes highly sensitive @@ -57,9 +59,9 @@ to random errors in the observed target, producing a large variance. This situation of *multicollinearity* can arise, for example, when data are collected without an experimental design. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_ols.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ols_ridge.py` Non-Negative Least Squares -------------------------- @@ -71,15 +73,15 @@ quantities (e.g., frequency counts or prices of goods). parameter: when set to `True` `Non-Negative Least Squares `_ are then applied. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py` Ordinary Least Squares Complexity --------------------------------- The least squares solution is computed using the singular value -decomposition of X. If X is a matrix of shape `(n_samples, n_features)` +decomposition of :math:`X`. If :math:`X` is a matrix of shape `(n_samples, n_features)` this method has a cost of :math:`O(n_{\text{samples}} n_{\text{features}}^2)`, assuming that :math:`n_{\text{samples}} \geq n_{\text{features}}`. @@ -124,7 +126,7 @@ its ``coef_`` member:: >>> reg.coef_ array([0.34545455, 0.34545455]) >>> reg.intercept_ - 0.13636... + np.float64(0.13636) Note that the class :class:`Ridge` allows for the user to specify that the solver be automatically chosen by setting `solver="auto"`. When this option @@ -143,6 +145,11 @@ the corresponding solver is chosen. | 'sparse_cg' | None of the above conditions are fulfilled. | +-------------+----------------------------------------------------+ +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_ols_ridge.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` Classification -------------- @@ -168,15 +175,14 @@ The :class:`RidgeClassifier` can be significantly faster than e.g. compute the projection matrix :math:`(X^T X)^{-1} X^T` only once. This classifier is sometimes referred to as a `Least Squares Support Vector -Machines +Machine `_ with a linear kernel. -.. topic:: Examples: +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` - * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` Ridge Complexity ---------------- @@ -209,20 +215,18 @@ Usage example:: RidgeCV(alphas=array([1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06])) >>> reg.alpha_ - 0.01 + np.float64(0.01) Specifying the value of the :term:`cv` attribute will trigger the use of cross-validation with :class:`~sklearn.model_selection.GridSearchCV`, for example `cv=10` for 10-fold cross-validation, rather than Leave-One-Out Cross-Validation. -.. topic:: References: - +.. dropdown:: References .. [RL2007] "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report `_, - `course slides - `_). + `course slides `_). .. _lasso: @@ -262,11 +266,11 @@ for another implementation:: The function :func:`lasso_path` is useful for lower-level tasks, as it computes the coefficients along the full path of possible values. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - * :ref:`sphx_glr_auto_examples_applications_plot_tomography_l1_reconstruction.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` +* :ref:`sphx_glr_auto_examples_applications_plot_tomography_l1_reconstruction.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` .. note:: **Feature selection with Lasso** @@ -275,23 +279,19 @@ computes the coefficients along the full path of possible values. thus be used to perform feature selection, as detailed in :ref:`l1_feature_selection`. -|details-start| -**References** -|details-split| - -The following two references explain the iterations -used in the coordinate descent solver of scikit-learn, as well as -the duality gap computation used for convergence control. +.. dropdown:: References -* "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). -* "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) + The following two references explain the iterations + used in the coordinate descent solver of scikit-learn, as well as + the duality gap computation used for convergence control. -|details-end| + * "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). + * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) Setting regularization parameter -------------------------------- @@ -348,10 +348,10 @@ the problem is badly conditioned (e.g. more features than samples). :align: center :scale: 50% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` .. _aic_bic: @@ -362,59 +362,57 @@ The definition of AIC (and thus BIC) might differ in the literature. In this section, we give more information regarding the criterion computed in scikit-learn. -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -The AIC criterion is defined as: + The AIC criterion is defined as: -.. math:: - AIC = -2 \log(\hat{L}) + 2 d + .. math:: + AIC = -2 \log(\hat{L}) + 2 d -where :math:`\hat{L}` is the maximum likelihood of the model and -:math:`d` is the number of parameters (as well referred to as degrees of -freedom in the previous section). + where :math:`\hat{L}` is the maximum likelihood of the model and + :math:`d` is the number of parameters (as well referred to as degrees of + freedom in the previous section). -The definition of BIC replace the constant :math:`2` by :math:`\log(N)`: + The definition of BIC replaces the constant :math:`2` by :math:`\log(N)`: -.. math:: - BIC = -2 \log(\hat{L}) + \log(N) d + .. math:: + BIC = -2 \log(\hat{L}) + \log(N) d -where :math:`N` is the number of samples. + where :math:`N` is the number of samples. -For a linear Gaussian model, the maximum log-likelihood is defined as: + For a linear Gaussian model, the maximum log-likelihood is defined as: -.. math:: - \log(\hat{L}) = - \frac{n}{2} \log(2 \pi) - \frac{n}{2} \ln(\sigma^2) - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{2\sigma^2} + .. math:: + \log(\hat{L}) = - \frac{n}{2} \log(2 \pi) - \frac{n}{2} \ln(\sigma^2) - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{2\sigma^2} -where :math:`\sigma^2` is an estimate of the noise variance, -:math:`y_i` and :math:`\hat{y}_i` are respectively the true and predicted -targets, and :math:`n` is the number of samples. + where :math:`\sigma^2` is an estimate of the noise variance, + :math:`y_i` and :math:`\hat{y}_i` are respectively the true and predicted + targets, and :math:`n` is the number of samples. -Plugging the maximum log-likelihood in the AIC formula yields: + Plugging the maximum log-likelihood in the AIC formula yields: -.. math:: - AIC = n \log(2 \pi \sigma^2) + \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sigma^2} + 2 d + .. math:: + AIC = n \log(2 \pi \sigma^2) + \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sigma^2} + 2 d -The first term of the above expression is sometimes discarded since it is a -constant when :math:`\sigma^2` is provided. In addition, -it is sometimes stated that the AIC is equivalent to the :math:`C_p` statistic -[12]_. In a strict sense, however, it is equivalent only up to some constant -and a multiplicative factor. + The first term of the above expression is sometimes discarded since it is a + constant when :math:`\sigma^2` is provided. In addition, + it is sometimes stated that the AIC is equivalent to the :math:`C_p` statistic + [12]_. In a strict sense, however, it is equivalent only up to some constant + and a multiplicative factor. -At last, we mentioned above that :math:`\sigma^2` is an estimate of the -noise variance. In :class:`LassoLarsIC` when the parameter `noise_variance` is -not provided (default), the noise variance is estimated via the unbiased -estimator [13]_ defined as: + At last, we mentioned above that :math:`\sigma^2` is an estimate of the + noise variance. In :class:`LassoLarsIC` when the parameter `noise_variance` is + not provided (default), the noise variance is estimated via the unbiased + estimator [13]_ defined as: -.. math:: - \sigma^2 = \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{n - p} + .. math:: + \sigma^2 = \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{n - p} -where :math:`p` is the number of features and :math:`\hat{y}_i` is the -predicted target using an ordinary least squares regression. Note, that this -formula is valid only when `n_samples > n_features`. + where :math:`p` is the number of features and :math:`\hat{y}_i` is the + predicted target using an ordinary least squares regression. Note, that this + formula is valid only when `n_samples > n_features`. -.. topic:: References: + .. rubric:: References .. [12] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani. "On the degrees of freedom of the lasso." @@ -426,8 +424,6 @@ formula is valid only when `n_samples > n_features`. Neural computation 15.7 (2003): 1691-1714. <10.1162/089976603321891864>` -|details-end| - Comparison with the regularization parameter of SVM ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -463,33 +459,29 @@ the MultiTaskLasso are full columns. .. centered:: Fitting a time-series model, imposing that any active feature be active at all times. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_multi_task_lasso_support.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_multi_task_lasso_support.py` -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -Mathematically, it consists of a linear model trained with a mixed -:math:`\ell_1` :math:`\ell_2`-norm for regularization. -The objective function to minimize is: - -.. math:: \min_{W} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}} + Mathematically, it consists of a linear model trained with a mixed + :math:`\ell_1` :math:`\ell_2`-norm for regularization. + The objective function to minimize is: -where :math:`\text{Fro}` indicates the Frobenius norm + .. math:: \min_{W} { \frac{1}{2n_{\text{samples}}} ||X W - Y||_{\text{Fro}} ^ 2 + \alpha ||W||_{21}} -.. math:: ||A||_{\text{Fro}} = \sqrt{\sum_{ij} a_{ij}^2} + where :math:`\text{Fro}` indicates the Frobenius norm -and :math:`\ell_1` :math:`\ell_2` reads + .. math:: ||A||_{\text{Fro}} = \sqrt{\sum_{ij} a_{ij}^2} -.. math:: ||A||_{2 1} = \sum_i \sqrt{\sum_j a_{ij}^2}. + and :math:`\ell_1` :math:`\ell_2` reads -The implementation in the class :class:`MultiTaskLasso` uses -coordinate descent as the algorithm to fit the coefficients. + .. math:: ||A||_{2 1} = \sum_i \sqrt{\sum_j a_{ij}^2}. -|details-end| + The implementation in the class :class:`MultiTaskLasso` uses + coordinate descent as the algorithm to fit the coefficients. .. _elastic_net: @@ -518,36 +510,33 @@ The objective function to minimize is in this case \frac{\alpha(1-\rho)}{2} ||w||_2 ^ 2} -.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_coordinate_descent_path_001.png - :target: ../auto_examples/linear_model/plot_lasso_coordinate_descent_path.html +.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_lasso_lars_elasticnet_path_002.png + :target: ../auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path.html :align: center :scale: 50% The class :class:`ElasticNetCV` can be used to set the parameters ``alpha`` (:math:`\alpha`) and ``l1_ratio`` (:math:`\rho`) by cross-validation. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py` +.. rubric:: Examples -|details-start| -**References** -|details-split| +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lasso_lars_elasticnet_path.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py` -The following two references explain the iterations -used in the coordinate descent solver of scikit-learn, as well as -the duality gap computation used for convergence control. +.. dropdown:: References -* "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). -* "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) + The following two references explain the iterations + used in the coordinate descent solver of scikit-learn, as well as + the duality gap computation used for convergence control. -|details-end| + * "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). + * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) .. _multi_task_elastic_net: @@ -626,8 +615,8 @@ algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients. -.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_lars_001.png - :target: ../auto_examples/linear_model/plot_lasso_lars.html +.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_lasso_lasso_lars_elasticnet_path_001.png + :target: ../auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path.html :align: center :scale: 50% @@ -638,39 +627,35 @@ function of the norm of its coefficients. >>> reg.fit([[0, 0], [1, 1]], [0, 1]) LassoLars(alpha=0.1) >>> reg.coef_ - array([0.6..., 0. ]) + array([0.6, 0. ]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lasso_lars_elasticnet_path.py` -The Lars algorithm provides the full path of the coefficients along +The LARS algorithm provides the full path of the coefficients along the regularization parameter almost for free, thus a common operation is to retrieve the path with one of the functions :func:`lars_path` or :func:`lars_path_gram`. -|details-start| -**Mathematical formulation** -|details-split| +.. dropdown:: Mathematical formulation -The algorithm is similar to forward stepwise regression, but instead -of including features at each step, the estimated coefficients are -increased in a direction equiangular to each one's correlations with -the residual. + The algorithm is similar to forward stepwise regression, but instead + of including features at each step, the estimated coefficients are + increased in a direction equiangular to each one's correlations with + the residual. -Instead of giving a vector result, the LARS solution consists of a -curve denoting the solution for each value of the :math:`\ell_1` norm of the -parameter vector. The full coefficients path is stored in the array -``coef_path_`` of shape `(n_features, max_features + 1)`. The first -column is always zero. + Instead of giving a vector result, the LARS solution consists of a + curve denoting the solution for each value of the :math:`\ell_1` norm of the + parameter vector. The full coefficients path is stored in the array + ``coef_path_`` of shape `(n_features, max_features + 1)`. The first + column is always zero. -.. topic:: References: + .. rubric:: References - * Original Algorithm is detailed in the paper `Least Angle Regression - `_ - by Hastie et al. - -|details-end| + * Original Algorithm is detailed in the paper `Least Angle Regression + `_ + by Hastie et al. .. _omp: @@ -678,7 +663,7 @@ Orthogonal Matching Pursuit (OMP) ================================= :class:`OrthogonalMatchingPursuit` and :func:`orthogonal_mp` implement the OMP algorithm for approximating the fit of a linear model with constraints imposed -on the number of non-zero coefficients (ie. the :math:`\ell_0` pseudo-norm). +on the number of non-zero coefficients (i.e. the :math:`\ell_0` pseudo-norm). Being a forward feature selection method like :ref:`least_angle_regression`, orthogonal matching pursuit can approximate the optimum solution vector with a @@ -701,21 +686,17 @@ residual is recomputed using an orthogonal projection on the space of the previously chosen dictionary elements. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_omp.py` +.. rubric:: Examples -|details-start| -**References** -|details-split| +* :ref:`sphx_glr_auto_examples_linear_model_plot_omp.py` -* https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf +.. dropdown:: References -* `Matching pursuits with time-frequency dictionaries - `_, - S. G. Mallat, Z. Zhang, + * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf -|details-end| + * `Matching pursuits with time-frequency dictionaries + `_, + S. G. Mallat, Z. Zhang, .. _bayesian_regression: @@ -754,17 +735,13 @@ The disadvantages of Bayesian regression include: - Inference of the model can be time consuming. -|details-start| -**References** -|details-split| - -* A good introduction to Bayesian methods is given in C. Bishop: Pattern - Recognition and Machine learning +.. dropdown:: References -* Original Algorithm is detailed in the book `Bayesian learning for neural - networks` by Radford M. Neal + * A good introduction to Bayesian methods is given in C. Bishop: Pattern + Recognition and Machine learning -|details-end| + * Original Algorithm is detailed in the book `Bayesian learning for neural + networks` by Radford M. Neal .. _bayesian_ridge_regression: @@ -817,25 +794,21 @@ The coefficients :math:`w` of the model can be accessed:: >>> reg.coef_ array([0.49999993, 0.49999993]) -Due to the Bayesian framework, the weights found are slightly different to the +Due to the Bayesian framework, the weights found are slightly different from the ones found by :ref:`ordinary_least_squares`. However, Bayesian Ridge Regression is more robust to ill-posed problems. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py` -|details-start| -**References** -|details-split| +.. dropdown:: References -* Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 + * Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 -* David J. C. MacKay, `Bayesian Interpolation `_, 1992. + * David J. C. MacKay, `Bayesian Interpolation `_, 1992. -* Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. - -|details-end| + * Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. .. _automatic_relevance_determination: @@ -867,20 +840,20 @@ ARD is also known in the literature as *Sparse Bayesian Learning* and *Relevance Vector Machine* [3]_ [4]_. For a worked-out comparison between ARD and `Bayesian Ridge Regression`_, see the example below. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_ard.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_ard.py` -.. topic:: References: +.. rubric:: References - .. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 +.. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 - .. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ +.. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ - .. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ +.. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ - .. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ +.. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ .. _Logistic_regression: @@ -917,17 +890,14 @@ regularization. implemented in scikit-learn, so it expects a categorical target, making the Logistic Regression a classifier. -.. topic:: Examples - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` - - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` +* :ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py` Binary Case ----------- @@ -999,47 +969,45 @@ logistic regression, see also `log-linear model especially important when using regularization. The choice of overparameterization can be detrimental for unpenalized models since then the solution may not be unique, as shown in [16]_. -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -Let :math:`y_i \in {1, \ldots, K}` be the label (ordinal) encoded target variable for observation :math:`i`. -Instead of a single coefficient vector, we now have -a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds to class -:math:`k`. We aim at predicting the class probabilities :math:`P(y_i=k|X_i)` via -:meth:`~sklearn.linear_model.LogisticRegression.predict_proba` as: + Let :math:`y_i \in {1, \ldots, K}` be the label (ordinal) encoded target variable for observation :math:`i`. + Instead of a single coefficient vector, we now have + a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds to class + :math:`k`. We aim at predicting the class probabilities :math:`P(y_i=k|X_i)` via + :meth:`~sklearn.linear_model.LogisticRegression.predict_proba` as: -.. math:: \hat{p}_k(X_i) = \frac{\exp(X_i W_k + W_{0, k})}{\sum_{l=0}^{K-1} \exp(X_i W_l + W_{0, l})}. + .. math:: \hat{p}_k(X_i) = \frac{\exp(X_i W_k + W_{0, k})}{\sum_{l=0}^{K-1} \exp(X_i W_l + W_{0, l})}. -The objective for the optimization becomes + The objective for the optimization becomes -.. math:: - \min_W -\frac{1}{S}\sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik} [y_i = k] \log(\hat{p}_k(X_i)) - + \frac{r(W)}{S C}\,. + .. math:: + \min_W -\frac{1}{S}\sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik} [y_i = k] \log(\hat{p}_k(X_i)) + + \frac{r(W)}{S C}\,, -Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0` -if :math:`P` is false, otherwise it evaluates to :math:`1`. + where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0` + if :math:`P` is false, otherwise it evaluates to :math:`1`. -Again, :math:`s_{ik}` are the weights assigned by the user (multiplication of sample -weights and class weights) with their sum :math:`S = \sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik}`. + Again, :math:`s_{ik}` are the weights assigned by the user (multiplication of sample + weights and class weights) with their sum :math:`S = \sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik}`. -We currently provide four choices -for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m` -is the number of features: + We currently provide four choices + for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m` + is the number of features: -+----------------+----------------------------------------------------------------------------------+ -| penalty | :math:`r(W)` | -+================+==================================================================================+ -| `None` | :math:`0` | -+----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|` | -+----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2` | -+----------------+----------------------------------------------------------------------------------+ -| `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` | -+----------------+----------------------------------------------------------------------------------+ + +----------------+----------------------------------------------------------------------------------+ + | penalty | :math:`r(W)` | + +================+==================================================================================+ + | `None` | :math:`0` | + +----------------+----------------------------------------------------------------------------------+ + | :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|` | + +----------------+----------------------------------------------------------------------------------+ + | :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2` | + +----------------+----------------------------------------------------------------------------------+ + | `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` | + +----------------+----------------------------------------------------------------------------------+ -|details-end| +.. _logistic_regression_solvers: Solvers ------- @@ -1093,59 +1061,57 @@ are zeroes. This is because for the sample(s) with ``decision_function`` zero, :class:`LogisticRegression` and :class:`~sklearn.svm.LinearSVC` predict the negative class, while liblinear predicts the positive class. Note that a model with ``fit_intercept=False`` and having many samples with ``decision_function`` -zero, is likely to be a underfit, bad model and you are advised to set +zero, is likely to be an underfit, bad model and you are advised to set ``fit_intercept=True`` and increase the ``intercept_scaling``. -|details-start| -**Solvers' details** -|details-split| - -* The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies - on the excellent C++ `LIBLINEAR library - `_, which is shipped with - scikit-learn. However, the CD algorithm implemented in liblinear cannot learn - a true multinomial (multiclass) model; instead, the optimization problem is - decomposed in a "one-vs-rest" fashion so separate binary classifiers are - trained for all classes. This happens under the hood, so - :class:`LogisticRegression` instances using this solver behave as multiclass - classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to - calculate the lower bound for C in order to get a non "null" (all feature - weights to zero) model. - -* The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` - regularization or no regularization, and are found to converge faster for some - high-dimensional data. Setting `multi_class` to "multinomial" with these solvers - learns a true multinomial logistic regression model [5]_, which means that its - probability estimates should be better calibrated than the default "one-vs-rest" - setting. - -* The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster - than other solvers for large datasets, when both the number of samples and the - number of features are large. - -* The "saga" solver [7]_ is a variant of "sag" that also supports the - non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse - multinomial logistic regression. It is also the only solver that supports - `penalty="elasticnet"`. - -* The "lbfgs" is an optimization algorithm that approximates the - Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to - quasi-Newton methods. As such, it can deal with a wide range of different training - data and is therefore the default solver. Its performance, however, suffers on poorly - scaled datasets and on datasets with one-hot encoded categorical features with rare - categories. - -* The "newton-cholesky" solver is an exact Newton solver that calculates the hessian - matrix and solves the resulting linear system. It is a very good choice for - `n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` - regularization is supported. Furthermore, because the hessian matrix is explicitly - computed, the memory usage has a quadratic dependency on `n_features` as well as on - `n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the - multiclass case. - -For a comparison of some of these solvers, see [9]_. - -.. topic:: References: +.. dropdown:: Solvers' details + + * The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies + on the excellent C++ `LIBLINEAR library + `_, which is shipped with + scikit-learn. However, the CD algorithm implemented in liblinear cannot learn + a true multinomial (multiclass) model; instead, the optimization problem is + decomposed in a "one-vs-rest" fashion so separate binary classifiers are + trained for all classes. This happens under the hood, so + :class:`LogisticRegression` instances using this solver behave as multiclass + classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to + calculate the lower bound for C in order to get a non "null" (all feature + weights to zero) model. + + * The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` + regularization or no regularization, and are found to converge faster for some + high-dimensional data. Setting `multi_class` to "multinomial" with these solvers + learns a true multinomial logistic regression model [5]_, which means that its + probability estimates should be better calibrated than the default "one-vs-rest" + setting. + + * The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster + than other solvers for large datasets, when both the number of samples and the + number of features are large. + + * The "saga" solver [7]_ is a variant of "sag" that also supports the + non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse + multinomial logistic regression. It is also the only solver that supports + `penalty="elasticnet"`. + + * The "lbfgs" is an optimization algorithm that approximates the + Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to + quasi-Newton methods. As such, it can deal with a wide range of different training + data and is therefore the default solver. Its performance, however, suffers on poorly + scaled datasets and on datasets with one-hot encoded categorical features with rare + categories. + + * The "newton-cholesky" solver is an exact Newton solver that calculates the hessian + matrix and solves the resulting linear system. It is a very good choice for + `n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` + regularization is supported. Furthermore, because the hessian matrix is explicitly + computed, the memory usage has a quadratic dependency on `n_features` as well as on + `n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the + multiclass case. + + For a comparison of some of these solvers, see [9]_. + + .. rubric:: References .. [5] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 4.3.4 @@ -1164,8 +1130,6 @@ For a comparison of some of these solvers, see [9]_. "A Blockwise Descent Algorithm for Group-penalized Multiresponse and Multinomial Regression." <1311.6529>` -|details-end| - .. note:: **Feature selection with sparse logistic regression** @@ -1262,38 +1226,34 @@ The choice of the distribution depends on the problem at hand: used for multiclass classification. -|details-start| -**Examples of use cases** -|details-split| - -* Agriculture / weather modeling: number of rain events per year (Poisson), - amount of rainfall per event (Gamma), total rainfall per year (Tweedie / - Compound Poisson Gamma). -* Risk modeling / insurance policy pricing: number of claim events / - policyholder per year (Poisson), cost per event (Gamma), total cost per - policyholder per year (Tweedie / Compound Poisson Gamma). -* Credit Default: probability that a loan can't be paid back (Bernoulli). -* Fraud Detection: probability that a financial transaction like a cash transfer - is a fraudulent transaction (Bernoulli). -* Predictive maintenance: number of production interruption events per year - (Poisson), duration of interruption (Gamma), total interruption time per year - (Tweedie / Compound Poisson Gamma). -* Medical Drug Testing: probability of curing a patient in a set of trials or - probability that a patient will experience side effects (Bernoulli). -* News Classification: classification of news articles into three categories - namely Business News, Politics and Entertainment news (Categorical). +.. dropdown:: Examples of use cases -|details-end| + * Agriculture / weather modeling: number of rain events per year (Poisson), + amount of rainfall per event (Gamma), total rainfall per year (Tweedie / + Compound Poisson Gamma). + * Risk modeling / insurance policy pricing: number of claim events / + policyholder per year (Poisson), cost per event (Gamma), total cost per + policyholder per year (Tweedie / Compound Poisson Gamma). + * Credit Default: probability that a loan can't be paid back (Bernoulli). + * Fraud Detection: probability that a financial transaction like a cash transfer + is a fraudulent transaction (Bernoulli). + * Predictive maintenance: number of production interruption events per year + (Poisson), duration of interruption (Gamma), total interruption time per year + (Tweedie / Compound Poisson Gamma). + * Medical Drug Testing: probability of curing a patient in a set of trials or + probability that a patient will experience side effects (Bernoulli). + * News Classification: classification of news articles into three categories + namely Business News, Politics and Entertainment news (Categorical). -.. topic:: References: +.. rubric:: References - .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, - Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. +.. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, + Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. - .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models - and analysis of deviance. Monografias de matemática, no. 51. See also - `Exponential dispersion model. - `_ +.. [11] Jørgensen, B. (1992). The theory of exponential dispersion models + and analysis of deviance. Monografias de matemática, no. 51. See also + `Exponential dispersion model. + `_ Usage ----- @@ -1322,42 +1282,38 @@ Usage example:: >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) TweedieRegressor(alpha=0.5, link='log', power=1) >>> reg.coef_ - array([0.2463..., 0.4337...]) + array([0.2463, 0.4337]) >>> reg.intercept_ - -0.7638... + np.float64(-0.7638) -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py` -|details-start| -**Practical considerations** -|details-split| +.. dropdown:: Practical considerations -The feature matrix `X` should be standardized before fitting. This ensures -that the penalty treats features equally. + The feature matrix `X` should be standardized before fitting. This ensures + that the penalty treats features equally. -Since the linear predictor :math:`Xw` can be negative and Poisson, -Gamma and Inverse Gaussian distributions don't support negative values, it -is necessary to apply an inverse link function that guarantees the -non-negativeness. For example with `link='log'`, the inverse link function -becomes :math:`h(Xw)=\exp(Xw)`. + Since the linear predictor :math:`Xw` can be negative and Poisson, + Gamma and Inverse Gaussian distributions don't support negative values, it + is necessary to apply an inverse link function that guarantees the + non-negativeness. For example with `link='log'`, the inverse link function + becomes :math:`h(Xw)=\exp(Xw)`. -If you want to model a relative frequency, i.e. counts per exposure (time, -volume, ...) you can do so by using a Poisson distribution and passing -:math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values -together with :math:`\mathrm{exposure}` as sample weights. For a concrete -example see e.g. -:ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`. + If you want to model a relative frequency, i.e. counts per exposure (time, + volume, ...) you can do so by using a Poisson distribution and passing + :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values + together with :math:`\mathrm{exposure}` as sample weights. For a concrete + example see e.g. + :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`. -When performing cross-validation for the `power` parameter of -`TweedieRegressor`, it is advisable to specify an explicit `scoring` function, -because the default scorer :meth:`TweedieRegressor.score` is a function of -`power` itself. - -|details-end| + When performing cross-validation for the `power` parameter of + `TweedieRegressor`, it is advisable to specify an explicit `scoring` function, + because the default scorer :meth:`TweedieRegressor.score` is a function of + `power` itself. Stochastic Gradient Descent - SGD ================================= @@ -1415,15 +1371,11 @@ For classification, :class:`PassiveAggressiveClassifier` can be used with ``loss='epsilon_insensitive'`` (PA-I) or ``loss='squared_epsilon_insensitive'`` (PA-II). -|details-start| -**References** -|details-split| - -* `"Online Passive-Aggressive Algorithms" - `_ - K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) +.. dropdown:: References -|details-end| + * `"Online Passive-Aggressive Algorithms" + `_ + K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) Robustness regression: outliers and modeling errors ===================================================== @@ -1491,7 +1443,7 @@ in these settings. * :ref:`HuberRegressor ` should be faster than :ref:`RANSAC ` and :ref:`Theil Sen ` - unless the number of samples are very large, i.e. ``n_samples`` >> ``n_features``. + unless the number of samples is very large, i.e. ``n_samples`` >> ``n_features``. This is because :ref:`RANSAC ` and :ref:`Theil Sen ` fit on smaller subsets of the data. However, both :ref:`Theil Sen ` and :ref:`RANSAC ` are unlikely to be as robust as @@ -1533,56 +1485,48 @@ estimated only from the determined inliers. :align: center :scale: 50% -.. topic:: Examples - - * :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` - -|details-start| -**Details of the algorithm** -|details-split| - -Each iteration performs the following steps: - -1. Select ``min_samples`` random samples from the original data and check - whether the set of data is valid (see ``is_data_valid``). -2. Fit a model to the random subset (``estimator.fit``) and check - whether the estimated model is valid (see ``is_model_valid``). -3. Classify all data as inliers or outliers by calculating the residuals - to the estimated model (``estimator.predict(X) - y``) - all data - samples with absolute residuals smaller than or equal to the - ``residual_threshold`` are considered as inliers. -4. Save fitted model as best model if number of inlier samples is - maximal. In case the current estimated model has the same number of - inliers, it is only considered as the best model if it has better score. - -These steps are performed either a maximum number of times (``max_trials``) or -until one of the special stop criteria are met (see ``stop_n_inliers`` and -``stop_score``). The final model is estimated using all inlier samples (consensus -set) of the previously determined best model. - -The ``is_data_valid`` and ``is_model_valid`` functions allow to identify and reject -degenerate combinations of random sub-samples. If the estimated model is not -needed for identifying degenerate cases, ``is_data_valid`` should be used as it -is called prior to fitting the model and thus leading to better computational -performance. - -|details-end| - -|details-start| -**References** -|details-split| - -* https://en.wikipedia.org/wiki/RANSAC -* `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to - Image Analysis and Automated Cartography" - `_ - Martin A. Fischler and Robert C. Bolles - SRI International (1981) -* `"Performance Evaluation of RANSAC Family" - `_ - Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) - -|details-end| +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` + +.. dropdown:: Details of the algorithm + + Each iteration performs the following steps: + + 1. Select ``min_samples`` random samples from the original data and check + whether the set of data is valid (see ``is_data_valid``). + 2. Fit a model to the random subset (``estimator.fit``) and check + whether the estimated model is valid (see ``is_model_valid``). + 3. Classify all data as inliers or outliers by calculating the residuals + to the estimated model (``estimator.predict(X) - y``) - all data + samples with absolute residuals smaller than or equal to the + ``residual_threshold`` are considered as inliers. + 4. Save fitted model as best model if number of inlier samples is + maximal. In case the current estimated model has the same number of + inliers, it is only considered as the best model if it has better score. + + These steps are performed either a maximum number of times (``max_trials``) or + until one of the special stop criteria are met (see ``stop_n_inliers`` and + ``stop_score``). The final model is estimated using all inlier samples (consensus + set) of the previously determined best model. + + The ``is_data_valid`` and ``is_model_valid`` functions allow to identify and reject + degenerate combinations of random sub-samples. If the estimated model is not + needed for identifying degenerate cases, ``is_data_valid`` should be used as it + is called prior to fitting the model and thus leading to better computational + performance. + +.. dropdown:: References + + * https://en.wikipedia.org/wiki/RANSAC + * `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to + Image Analysis and Automated Cartography" + `_ + Martin A. Fischler and Robert C. Bolles - SRI International (1981) + * `"Performance Evaluation of RANSAC Family" + `_ + Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) .. _theil_sen_regression: @@ -1595,47 +1539,45 @@ that the robustness of the estimator decreases quickly with the dimensionality of the problem. It loses its robustness properties and becomes no better than an ordinary least squares in high dimension. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` -|details-start| -**Theoretical considerations** -|details-split| +.. dropdown:: Theoretical considerations -:class:`TheilSenRegressor` is comparable to the :ref:`Ordinary Least Squares -(OLS) ` in terms of asymptotic efficiency and as an -unbiased estimator. In contrast to OLS, Theil-Sen is a non-parametric -method which means it makes no assumption about the underlying -distribution of the data. Since Theil-Sen is a median-based estimator, it -is more robust against corrupted data aka outliers. In univariate -setting, Theil-Sen has a breakdown point of about 29.3% in case of a -simple linear regression which means that it can tolerate arbitrary -corrupted data of up to 29.3%. + :class:`TheilSenRegressor` is comparable to the :ref:`Ordinary Least Squares + (OLS) ` in terms of asymptotic efficiency and as an + unbiased estimator. In contrast to OLS, Theil-Sen is a non-parametric + method which means it makes no assumption about the underlying + distribution of the data. Since Theil-Sen is a median-based estimator, it + is more robust against corrupted data aka outliers. In univariate + setting, Theil-Sen has a breakdown point of about 29.3% in case of a + simple linear regression which means that it can tolerate arbitrary + corrupted data of up to 29.3%. -.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_theilsen_001.png - :target: ../auto_examples/linear_model/plot_theilsen.html - :align: center - :scale: 50% + .. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_theilsen_001.png + :target: ../auto_examples/linear_model/plot_theilsen.html + :align: center + :scale: 50% -The implementation of :class:`TheilSenRegressor` in scikit-learn follows a -generalization to a multivariate linear regression model [#f1]_ using the -spatial median which is a generalization of the median to multiple -dimensions [#f2]_. + The implementation of :class:`TheilSenRegressor` in scikit-learn follows a + generalization to a multivariate linear regression model [#f1]_ using the + spatial median which is a generalization of the median to multiple + dimensions [#f2]_. -In terms of time and space complexity, Theil-Sen scales according to + In terms of time and space complexity, Theil-Sen scales according to -.. math:: - \binom{n_{\text{samples}}}{n_{\text{subsamples}}} + .. math:: + \binom{n_{\text{samples}}}{n_{\text{subsamples}}} -which makes it infeasible to be applied exhaustively to problems with a -large number of samples and features. Therefore, the magnitude of a -subpopulation can be chosen to limit the time and space complexity by -considering only a random subset of all possible combinations. + which makes it infeasible to be applied exhaustively to problems with a + large number of samples and features. Therefore, the magnitude of a + subpopulation can be chosen to limit the time and space complexity by + considering only a random subset of all possible combinations. -.. topic:: References: + .. rubric:: References .. [#f1] Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang: `Theil-Sen Estimators in a Multiple Linear Regression Model. `_ @@ -1643,18 +1585,16 @@ considering only a random subset of all possible combinations. Also see the `Wikipedia page `_ -|details-end| - .. _huber_regression: Huber Regression ---------------- -The :class:`HuberRegressor` is different to :class:`Ridge` because it applies a -linear loss to samples that are classified as outliers. +The :class:`HuberRegressor` is different from :class:`Ridge` because it applies a +linear loss to samples that are defined as outliers by the `epsilon` parameter. A sample is classified as an inlier if the absolute error of that sample is -lesser than a certain threshold. It differs from :class:`TheilSenRegressor` +less than the threshold `epsilon`. It differs from :class:`TheilSenRegressor` and :class:`RANSACRegressor` because it does not ignore the effect of the outliers but gives a lesser weight to them. @@ -1663,38 +1603,34 @@ but gives a lesser weight to them. :align: center :scale: 50% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -The loss function that :class:`HuberRegressor` minimizes is given by + :class:`HuberRegressor` minimizes -.. math:: + .. math:: - \min_{w, \sigma} {\sum_{i=1}^n\left(\sigma + H_{\epsilon}\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \alpha {||w||_2}^2} + \min_{w, \sigma} {\sum_{i=1}^n\left(\sigma + H_{\epsilon}\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \alpha {||w||_2}^2} -where + where the loss function is given by -.. math:: + .. math:: - H_{\epsilon}(z) = \begin{cases} - z^2, & \text {if } |z| < \epsilon, \\ - 2\epsilon|z| - \epsilon^2, & \text{otherwise} - \end{cases} + H_{\epsilon}(z) = \begin{cases} + z^2, & \text {if } |z| < \epsilon, \\ + 2\epsilon|z| - \epsilon^2, & \text{otherwise} + \end{cases} -It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% -statistical efficiency. + It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% + statistical efficiency. -.. topic:: References: + .. rubric:: References * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale - estimates, pg 172 - -|details-end| + estimates, p. 172. The :class:`HuberRegressor` differs from using :class:`SGDRegressor` with loss set to `huber` in the following ways. @@ -1708,10 +1644,10 @@ in the following ways. samples while :class:`SGDRegressor` needs a number of passes on the training data to produce the same robustness. -Note that this estimator is different from the R implementation of Robust Regression -(https://stats.oarc.ucla.edu/r/dae/robust-regression/) because the R implementation does a weighted least -squares implementation with weights given to each sample on the basis of how much the residual is -greater than a certain threshold. +Note that this estimator is different from the `R implementation of Robust +Regression `_ because the R +implementation does a weighted least squares implementation with weights given to each +sample on the basis of how much the residual is greater than a certain threshold. .. _quantile_regression: @@ -1745,59 +1681,51 @@ Most implementations of quantile regression are based on linear programming problem. The current implementation is based on :func:`scipy.optimize.linprog`. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -As a linear model, the :class:`QuantileRegressor` gives linear predictions -:math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`. -The weights or coefficients :math:`w` are then found by the following -minimization problem: + As a linear model, the :class:`QuantileRegressor` gives linear predictions + :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`. + The weights or coefficients :math:`w` are then found by the following + minimization problem: -.. math:: - \min_{w} {\frac{1}{n_{\text{samples}}} - \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}. + .. math:: + \min_{w} {\frac{1}{n_{\text{samples}}} + \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}. -This consists of the pinball loss (also known as linear loss), -see also :class:`~sklearn.metrics.mean_pinball_loss`, + This consists of the pinball loss (also known as linear loss), + see also :class:`~sklearn.metrics.mean_pinball_loss`, -.. math:: - PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) = - \begin{cases} - q t, & t > 0, \\ - 0, & t = 0, \\ - (q-1) t, & t < 0 - \end{cases} - -and the L1 penalty controlled by parameter ``alpha``, similar to -:class:`Lasso`. + .. math:: + PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) = + \begin{cases} + q t, & t > 0, \\ + 0, & t = 0, \\ + (q-1) t, & t < 0 + \end{cases} -As the pinball loss is only linear in the residuals, quantile regression is -much more robust to outliers than squared error based estimation of the mean. -Somewhat in between is the :class:`HuberRegressor`. + and the L1 penalty controlled by parameter ``alpha``, similar to + :class:`Lasso`. -|details-end| + As the pinball loss is only linear in the residuals, quantile regression is + much more robust to outliers than squared error based estimation of the mean. + Somewhat in between is the :class:`HuberRegressor`. -|details-start| -**References** -|details-split| +.. dropdown:: References -* Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. - `_ - Econometrica: journal of the Econometric Society, 33-50. + * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. + `_ + Econometrica: journal of the Econometric Society, 33-50. -* Portnoy, S., & Koenker, R. (1997). :doi:`The Gaussian hare and the Laplacian - tortoise: computability of squared-error versus absolute-error estimators. - Statistical Science, 12, 279-300 <10.1214/ss/1030037960>`. + * Portnoy, S., & Koenker, R. (1997). :doi:`The Gaussian hare and the Laplacian + tortoise: computability of squared-error versus absolute-error estimators. + Statistical Science, 12, 279-300 <10.1214/ss/1030037960>`. -* Koenker, R. (2005). :doi:`Quantile Regression <10.1017/CBO9780511754098>`. - Cambridge University Press. - -|details-end| + * Koenker, R. (2005). :doi:`Quantile Regression <10.1017/CBO9780511754098>`. + Cambridge University Press. .. _polynomial_regression: @@ -1812,38 +1740,34 @@ on nonlinear functions of the data. This approach maintains the generally fast performance of linear methods, while allowing them to fit a much wider range of data. -|details-start| -**Mathematical details** -|details-split| - -For example, a simple linear regression can be extended by constructing -**polynomial features** from the coefficients. In the standard linear -regression case, you might have a model that looks like this for -two-dimensional data: +.. dropdown:: Mathematical details -.. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + For example, a simple linear regression can be extended by constructing + **polynomial features** from the coefficients. In the standard linear + regression case, you might have a model that looks like this for + two-dimensional data: -If we want to fit a paraboloid to the data instead of a plane, we can combine -the features in second-order polynomials, so that the model looks like this: + .. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 -.. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + w_3 x_1 x_2 + w_4 x_1^2 + w_5 x_2^2 + If we want to fit a paraboloid to the data instead of a plane, we can combine + the features in second-order polynomials, so that the model looks like this: -The (sometimes surprising) observation is that this is *still a linear model*: -to see this, imagine creating a new set of features + .. math:: \hat{y}(w, x) = w_0 + w_1 x_1 + w_2 x_2 + w_3 x_1 x_2 + w_4 x_1^2 + w_5 x_2^2 -.. math:: z = [x_1, x_2, x_1 x_2, x_1^2, x_2^2] + The (sometimes surprising) observation is that this is *still a linear model*: + to see this, imagine creating a new set of features -With this re-labeling of the data, our problem can be written + .. math:: z = [x_1, x_2, x_1 x_2, x_1^2, x_2^2] -.. math:: \hat{y}(w, z) = w_0 + w_1 z_1 + w_2 z_2 + w_3 z_3 + w_4 z_4 + w_5 z_5 + With this re-labeling of the data, our problem can be written -We see that the resulting *polynomial regression* is in the same class of -linear models we considered above (i.e. the model is linear in :math:`w`) -and can be solved by the same techniques. By considering linear fits within -a higher-dimensional space built with these basis functions, the model has the -flexibility to fit a much broader range of data. + .. math:: \hat{y}(w, z) = w_0 + w_1 z_1 + w_2 z_2 + w_3 z_3 + w_4 z_4 + w_5 z_5 -|details-end| + We see that the resulting *polynomial regression* is in the same class of + linear models we considered above (i.e. the model is linear in :math:`w`) + and can be solved by the same techniques. By considering linear fits within + a higher-dimensional space built with these basis functions, the model has the + flexibility to fit a much broader range of data. Here is an example of applying this idea to one-dimensional data, using polynomial features of varying degrees: diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst index 7cc6776e37daa..fec6e96153323 100644 --- a/doc/modules/manifold.rst +++ b/doc/modules/manifold.rst @@ -7,16 +7,14 @@ Manifold learning ================= -.. rst-class:: quote - - | Look for the bare necessities - | The simple bare necessities - | Forget about your worries and your strife - | I mean the bare necessities - | Old Mother Nature's recipes - | That bring the bare necessities of life - | - | -- Baloo's song [The Jungle Book] +| Look for the bare necessities +| The simple bare necessities +| Forget about your worries and your strife +| I mean the bare necessities +| Old Mother Nature's recipes +| That bring the bare necessities of life +| +| -- Baloo's song [The Jungle Book] @@ -102,13 +100,20 @@ unsupervised: it learns the high-dimensional structure of the data from the data itself, without the use of predetermined classifications. -.. topic:: Examples: +.. rubric:: Examples + +* See :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` for an example of + dimensionality reduction on handwritten digits. + +* See :ref:`sphx_glr_auto_examples_manifold_plot_compare_methods.py` for an example of + dimensionality reduction on a toy "S-curve" dataset. - * See :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` for an example of - dimensionality reduction on handwritten digits. +* See :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` for an example of + using manifold learning to map the stock market structure based on historical stock + prices. - * See :ref:`sphx_glr_auto_examples_manifold_plot_compare_methods.py` for an example of - dimensionality reduction on a toy "S-curve" dataset. +* See :ref:`sphx_glr_auto_examples_manifold_plot_manifold_sphere.py` for an example of + manifold learning techniques applied to a spherical data-set. The manifold learning implementations available in scikit-learn are summarized below @@ -130,47 +135,43 @@ distances between all points. Isomap can be performed with the object :align: center :scale: 50 -|details-start| -**Complexity** -|details-split| +.. dropdown:: Complexity -The Isomap algorithm comprises three stages: + The Isomap algorithm comprises three stages: -1. **Nearest neighbor search.** Isomap uses - :class:`~sklearn.neighbors.BallTree` for efficient neighbor search. - The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k` - nearest neighbors of :math:`N` points in :math:`D` dimensions. + 1. **Nearest neighbor search.** Isomap uses + :class:`~sklearn.neighbors.BallTree` for efficient neighbor search. + The cost is approximately :math:`O[D \log(k) N \log(N)]`, for :math:`k` + nearest neighbors of :math:`N` points in :math:`D` dimensions. -2. **Shortest-path graph search.** The most efficient known algorithms - for this are *Dijkstra's Algorithm*, which is approximately - :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which - is :math:`O[N^3]`. The algorithm can be selected by the user with - the ``path_method`` keyword of ``Isomap``. If unspecified, the code - attempts to choose the best algorithm for the input data. + 2. **Shortest-path graph search.** The most efficient known algorithms + for this are *Dijkstra's Algorithm*, which is approximately + :math:`O[N^2(k + \log(N))]`, or the *Floyd-Warshall algorithm*, which + is :math:`O[N^3]`. The algorithm can be selected by the user with + the ``path_method`` keyword of ``Isomap``. If unspecified, the code + attempts to choose the best algorithm for the input data. -3. **Partial eigenvalue decomposition.** The embedding is encoded in the - eigenvectors corresponding to the :math:`d` largest eigenvalues of the - :math:`N \times N` isomap kernel. For a dense solver, the cost is - approximately :math:`O[d N^2]`. This cost can often be improved using - the ``ARPACK`` solver. The eigensolver can be specified by the user - with the ``eigen_solver`` keyword of ``Isomap``. If unspecified, the - code attempts to choose the best algorithm for the input data. + 3. **Partial eigenvalue decomposition.** The embedding is encoded in the + eigenvectors corresponding to the :math:`d` largest eigenvalues of the + :math:`N \times N` isomap kernel. For a dense solver, the cost is + approximately :math:`O[d N^2]`. This cost can often be improved using + the ``ARPACK`` solver. The eigensolver can be specified by the user + with the ``eigen_solver`` keyword of ``Isomap``. If unspecified, the + code attempts to choose the best algorithm for the input data. -The overall complexity of Isomap is -:math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`. + The overall complexity of Isomap is + :math:`O[D \log(k) N \log(N)] + O[N^2(k + \log(N))] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -|details-end| +.. rubric:: References -.. topic:: References: - - * `"A global geometric framework for nonlinear dimensionality reduction" - `_ - Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. Science 290 (5500) +* `"A global geometric framework for nonlinear dimensionality reduction" + `_ + Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. Science 290 (5500) .. _locally_linear_embedding: @@ -191,36 +192,32 @@ Locally linear embedding can be performed with function :align: center :scale: 50 -|details-start| -**Complexity** -|details-split| - -The standard LLE algorithm comprises three stages: +.. dropdown:: Complexity -1. **Nearest Neighbors Search**. See discussion under Isomap above. + The standard LLE algorithm comprises three stages: -2. **Weight Matrix Construction**. :math:`O[D N k^3]`. - The construction of the LLE weight matrix involves the solution of a - :math:`k \times k` linear equation for each of the :math:`N` local - neighborhoods + 1. **Nearest Neighbors Search**. See discussion under Isomap above. -3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above. + 2. **Weight Matrix Construction**. :math:`O[D N k^3]`. + The construction of the LLE weight matrix involves the solution of a + :math:`k \times k` linear equation for each of the :math:`N` local + neighborhoods. -The overall complexity of standard LLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. + 3. **Partial Eigenvalue Decomposition**. See discussion under Isomap above. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + The overall complexity of standard LLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. -|details-end| + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"Nonlinear dimensionality reduction by locally linear embedding" - `_ - Roweis, S. & Saul, L. Science 290:2323 (2000) +* `"Nonlinear dimensionality reduction by locally linear embedding" + `_ + Roweis, S. & Saul, L. Science 290:2323 (2000) Modified Locally Linear Embedding @@ -248,38 +245,34 @@ It requires ``n_neighbors > n_components``. :align: center :scale: 50 -|details-start| -**Complexity** -|details-split| - -The MLLE algorithm comprises three stages: +.. dropdown:: Complexity -1. **Nearest Neighbors Search**. Same as standard LLE + The MLLE algorithm comprises three stages: -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[N (k-D) k^2]`. The first term is exactly equivalent - to that of standard LLE. The second term has to do with constructing the - weight matrix from multiple weights. In practice, the added cost of - constructing the MLLE weight matrix is relatively small compared to the - cost of stages 1 and 3. + 1. **Nearest Neighbors Search**. Same as standard LLE -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N (k-D) k^2]`. The first term is exactly equivalent + to that of standard LLE. The second term has to do with constructing the + weight matrix from multiple weights. In practice, the added cost of + constructing the MLLE weight matrix is relatively small compared to the + cost of stages 1 and 3. -The overall complexity of MLLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`. + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + The overall complexity of MLLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N (k-D) k^2] + O[d N^2]`. -|details-end| + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" - `_ - Zhang, Z. & Wang, J. +* `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" + `_ + Zhang, Z. & Wang, J. Hessian Eigenmapping @@ -301,36 +294,32 @@ It requires ``n_neighbors > n_components * (n_components + 3) / 2``. :align: center :scale: 50 -|details-start| -**Complexity** -|details-split| +.. dropdown:: Complexity -The HLLE algorithm comprises three stages: + The HLLE algorithm comprises three stages: -1. **Nearest Neighbors Search**. Same as standard LLE + 1. **Nearest Neighbors Search**. Same as standard LLE -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[N d^6]`. The first term reflects a similar - cost to that of standard LLE. The second term comes from a QR - decomposition of the local hessian estimator. + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[N d^6]`. The first term reflects a similar + cost to that of standard LLE. The second term comes from a QR + decomposition of the local hessian estimator. -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE. -The overall complexity of standard HLLE is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`. + The overall complexity of standard HLLE is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[N d^6] + O[d N^2]`. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -|details-end| +.. rubric:: References -.. topic:: References: - - * `"Hessian Eigenmaps: Locally linear embedding techniques for - high-dimensional data" `_ - Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003) +* `"Hessian Eigenmaps: Locally linear embedding techniques for + high-dimensional data" `_ + Donoho, D. & Grimes, C. Proc Natl Acad Sci USA. 100:5591 (2003) .. _spectral_embedding: @@ -348,38 +337,34 @@ preserving local distances. Spectral embedding can be performed with the function :func:`spectral_embedding` or its object-oriented counterpart :class:`SpectralEmbedding`. -|details-start| -**Complexity** -|details-split| - -The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages: +.. dropdown:: Complexity -1. **Weighted Graph Construction**. Transform the raw input data into - graph representation using affinity (adjacency) matrix representation. + The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages: -2. **Graph Laplacian Construction**. unnormalized Graph Laplacian - is constructed as :math:`L = D - A` for and normalized one as - :math:`L = D^{-\frac{1}{2}} (D - A) D^{-\frac{1}{2}}`. + 1. **Weighted Graph Construction**. Transform the raw input data into + graph representation using affinity (adjacency) matrix representation. -3. **Partial Eigenvalue Decomposition**. Eigenvalue decomposition is - done on graph Laplacian + 2. **Graph Laplacian Construction**. unnormalized Graph Laplacian + is constructed as :math:`L = D - A` for and normalized one as + :math:`L = D^{-\frac{1}{2}} (D - A) D^{-\frac{1}{2}}`. -The overall complexity of spectral embedding is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. + 3. **Partial Eigenvalue Decomposition**. Eigenvalue decomposition is + done on graph Laplacian. -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + The overall complexity of spectral embedding is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[d N^2]`. -|details-end| + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * `"Laplacian Eigenmaps for Dimensionality Reduction - and Data Representation" - `_ - M. Belkin, P. Niyogi, Neural Computation, June 2003; 15 (6):1373-1396 +* `"Laplacian Eigenmaps for Dimensionality Reduction + and Data Representation" + `_ + M. Belkin, P. Niyogi, Neural Computation, June 2003; 15 (6):1373-1396 Local Tangent Space Alignment @@ -399,36 +384,32 @@ tangent spaces to learn the embedding. LTSA can be performed with function :align: center :scale: 50 -|details-start| -**Complexity** -|details-split| - -The LTSA algorithm comprises three stages: +.. dropdown:: Complexity -1. **Nearest Neighbors Search**. Same as standard LLE + The LTSA algorithm comprises three stages: -2. **Weight Matrix Construction**. Approximately - :math:`O[D N k^3] + O[k^2 d]`. The first term reflects a similar - cost to that of standard LLE. + 1. **Nearest Neighbors Search**. Same as standard LLE -3. **Partial Eigenvalue Decomposition**. Same as standard LLE + 2. **Weight Matrix Construction**. Approximately + :math:`O[D N k^3] + O[k^2 d]`. The first term reflects a similar + cost to that of standard LLE. -The overall complexity of standard LTSA is -:math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`. + 3. **Partial Eigenvalue Decomposition**. Same as standard LLE -* :math:`N` : number of training data points -* :math:`D` : input dimension -* :math:`k` : number of nearest neighbors -* :math:`d` : output dimension + The overall complexity of standard LTSA is + :math:`O[D \log(k) N \log(N)] + O[D N k^3] + O[k^2 d] + O[d N^2]`. -|details-end| + * :math:`N` : number of training data points + * :math:`D` : input dimension + * :math:`k` : number of nearest neighbors + * :math:`d` : output dimension -.. topic:: References: +.. rubric:: References - * :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via - tangent space alignment" - ` - Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004) +* :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via + tangent space alignment" + ` + Zhang, Z. & Zha, H. Journal of Shanghai Univ. 8:406 (2004) .. _multidimensional_scaling: @@ -440,20 +421,19 @@ Multi-dimensional Scaling (MDS) representation of the data in which the distances respect well the distances in the original high-dimensional space. -In general, :class:`MDS` is a technique used for analyzing similarity or -dissimilarity data. It attempts to model similarity or dissimilarity data as -distances in a geometric spaces. The data can be ratings of similarity between +In general, :class:`MDS` is a technique used for analyzing +dissimilarity data. It attempts to model dissimilarities as +distances in a Euclidean space. The data can be ratings of dissimilarity between objects, interaction frequencies of molecules, or trade indices between countries. -There exists two types of MDS algorithm: metric and non metric. In -scikit-learn, the class :class:`MDS` implements both. In Metric MDS, the input -similarity matrix arises from a metric (and thus respects the triangular -inequality), the distances between output two points are then set to be as -close as possible to the similarity or dissimilarity data. In the non-metric -version, the algorithms will try to preserve the order of the distances, and +There exist two types of MDS algorithm: metric and non-metric. In +scikit-learn, the class :class:`MDS` implements both. In metric MDS, +the distances in the embedding space are set as +close as possible to the dissimilarity data. In the non-metric +version, the algorithm will try to preserve the order of the distances, and hence seek for a monotonic relationship between the distances in the embedded -space and the similarities/dissimilarities. +space and the input dissimilarities. .. figure:: ../auto_examples/manifold/images/sphx_glr_plot_lle_digits_010.png :target: ../auto_examples/manifold/plot_lle_digits.html @@ -461,73 +441,68 @@ space and the similarities/dissimilarities. :scale: 50 -Let :math:`S` be the similarity matrix, and :math:`X` the coordinates of the -:math:`n` input points. Disparities :math:`\hat{d}_{ij}` are transformation of -the similarities chosen in some optimal ways. The objective, called the -stress, is then defined by :math:`\sum_{i < j} d_{ij}(X) - \hat{d}_{ij}(X)` +Let :math:`\delta_{ij}` be the dissimilarity matrix between the +:math:`n` input points (possibly arising as some pairwise distances +:math:`d_{ij}(X)` between the coordinates :math:`X` of the input points). +Disparities :math:`\hat{d}_{ij} = f(\delta_{ij})` are some transformation of +the dissimilarities. The MDS objective, called the raw stress, is then +defined by :math:`\sum_{i < j} (\hat{d}_{ij} - d_{ij}(Z))^2`, +where :math:`d_{ij}(Z)` are the pairwise distances between the +coordinates :math:`Z` of the embedded points. -|details-start| -**Metric MDS** -|details-split| +.. dropdown:: Metric MDS -The simplest metric :class:`MDS` model, called *absolute MDS*, disparities are defined by -:math:`\hat{d}_{ij} = S_{ij}`. With absolute MDS, the value :math:`S_{ij}` -should then correspond exactly to the distance between point :math:`i` and -:math:`j` in the embedding point. + In the metric :class:`MDS` model (sometimes also called *absolute MDS*), + disparities are simply equal to the input dissimilarities + :math:`\hat{d}_{ij} = \delta_{ij}`. -Most commonly, disparities are set to :math:`\hat{d}_{ij} = b S_{ij}`. +.. dropdown:: Nonmetric MDS -|details-end| + Non metric :class:`MDS` focuses on the ordination of the data. If + :math:`\delta_{ij} > \delta_{kl}`, then the embedding + seeks to enforce :math:`d_{ij}(Z) > d_{kl}(Z)`. A simple algorithm + to enforce proper ordination is to use an + isotonic regression of :math:`d_{ij}(Z)` on :math:`\delta_{ij}`, yielding + disparities :math:`\hat{d}_{ij}` that are a monotonic transformation + of dissimilarities :math:`\delta_{ij}` and hence having the same ordering. + This is done repeatedly after every step of the optimization algorithm. + In order to avoid the trivial solution where all embedding points are + overlapping, the disparities :math:`\hat{d}_{ij}` are normalized. -|details-start| -**Nonmetric MDS** -|details-split| + Note that since we only care about relative ordering, our objective should be + invariant to simple translation and scaling, however the stress used in metric + MDS is sensitive to scaling. To address this, non-metric MDS returns + normalized stress, also known as Stress-1, defined as -Non metric :class:`MDS` focuses on the ordination of the data. If -:math:`S_{ij} > S_{jk}`, then the embedding should enforce :math:`d_{ij} < -d_{jk}`. For this reason, we discuss it in terms of dissimilarities -(:math:`\delta_{ij}`) instead of similarities (:math:`S_{ij}`). Note that -dissimilarities can easily be obtained from similarities through a simple -transform, e.g. :math:`\delta_{ij}=c_1-c_2 S_{ij}` for some real constants -:math:`c_1, c_2`. A simple algorithm to enforce proper ordination is to use a -monotonic regression of :math:`d_{ij}` on :math:`\delta_{ij}`, yielding -disparities :math:`\hat{d}_{ij}` in the same order as :math:`\delta_{ij}`. + .. math:: + \sqrt{\frac{\sum_{i < j} (\hat{d}_{ij} - d_{ij}(Z))^2}{\sum_{i < j} + d_{ij}(Z)^2}}. -A trivial solution to this problem is to set all the points on the origin. In -order to avoid that, the disparities :math:`\hat{d}_{ij}` are normalized. Note -that since we only care about relative ordering, our objective should be -invariant to simple translation and scaling, however the stress used in metric -MDS is sensitive to scaling. To address this, non-metric MDS may use a -normalized stress, known as Stress-1 defined as + Normalized Stress-1 is returned if `normalized_stress=True`. -.. math:: - \sqrt{\frac{\sum_{i < j} (d_{ij} - \hat{d}_{ij})^2}{\sum_{i < j} d_{ij}^2}}. - -The use of normalized Stress-1 can be enabled by setting `normalized_stress=True`, -however it is only compatible with the non-metric MDS problem and will be ignored -in the metric case. - -.. figure:: ../auto_examples/manifold/images/sphx_glr_plot_mds_001.png - :target: ../auto_examples/manifold/plot_mds.html - :align: center - :scale: 60 + .. figure:: ../auto_examples/manifold/images/sphx_glr_plot_mds_001.png + :target: ../auto_examples/manifold/plot_mds.html + :align: center + :scale: 60 -|details-end| +.. rubric:: References -.. topic:: References: +* `"More on Multidimensional Scaling and Unfolding in R: smacof Version 2" + `_ + Mair P, Groenen P., de Leeuw J. Journal of Statistical Software (2022) - * `"Modern Multidimensional Scaling - Theory and Applications" - `_ - Borg, I.; Groenen P. Springer Series in Statistics (1997) +* `"Modern Multidimensional Scaling - Theory and Applications" + `_ + Borg, I.; Groenen P. Springer Series in Statistics (1997) - * `"Nonmetric multidimensional scaling: a numerical method" - `_ - Kruskal, J. Psychometrika, 29 (1964) +* `"Nonmetric multidimensional scaling: a numerical method" + `_ + Kruskal, J. Psychometrika, 29 (1964) - * `"Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis" - `_ - Kruskal, J. Psychometrika, 29, (1964) +* `"Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis" + `_ + Kruskal, J. Psychometrika, 29, (1964) .. _t_sne: @@ -575,120 +550,110 @@ The disadvantages to using t-SNE are roughly: :align: center :scale: 50 -|details-start| -**Optimizing t-SNE** -|details-split| - -The main purpose of t-SNE is visualization of high-dimensional data. Hence, -it works best when the data will be embedded on two or three dimensions. - -Optimizing the KL divergence can be a little bit tricky sometimes. There are -five parameters that control the optimization of t-SNE and therefore possibly -the quality of the resulting embedding: - -* perplexity -* early exaggeration factor -* learning rate -* maximum number of iterations -* angle (not used in the exact method) - -The perplexity is defined as :math:`k=2^{(S)}` where :math:`S` is the Shannon -entropy of the conditional probability distribution. The perplexity of a -:math:`k`-sided die is :math:`k`, so that :math:`k` is effectively the number of -nearest neighbors t-SNE considers when generating the conditional probabilities. -Larger perplexities lead to more nearest neighbors and less sensitive to small -structure. Conversely a lower perplexity considers a smaller number of -neighbors, and thus ignores more global information in favour of the -local neighborhood. As dataset sizes get larger more points will be -required to get a reasonable sample of the local neighborhood, and hence -larger perplexities may be required. Similarly noisier datasets will require -larger perplexity values to encompass enough local neighbors to see beyond -the background noise. - -The maximum number of iterations is usually high enough and does not need -any tuning. The optimization consists of two phases: the early exaggeration -phase and the final optimization. During early exaggeration the joint -probabilities in the original space will be artificially increased by -multiplication with a given factor. Larger factors result in larger gaps -between natural clusters in the data. If the factor is too high, the KL -divergence could increase during this phase. Usually it does not have to be -tuned. A critical parameter is the learning rate. If it is too low gradient -descent will get stuck in a bad local minimum. If it is too high the KL -divergence will increase during optimization. A heuristic suggested in -Belkina et al. (2019) is to set the learning rate to the sample size -divided by the early exaggeration factor. We implement this heuristic -as `learning_rate='auto'` argument. More tips can be found in -Laurens van der Maaten's FAQ (see references). The last parameter, angle, -is a tradeoff between performance and accuracy. Larger angles imply that we -can approximate larger regions by a single point, leading to better speed -but less accurate results. - -`"How to Use t-SNE Effectively" `_ -provides a good discussion of the effects of the various parameters, as well -as interactive plots to explore the effects of different parameters. - -|details-end| - -|details-start| -**Barnes-Hut t-SNE** -|details-split| - -The Barnes-Hut t-SNE that has been implemented here is usually much slower than -other manifold learning algorithms. The optimization is quite difficult -and the computation of the gradient is :math:`O[d N log(N)]`, where :math:`d` -is the number of output dimensions and :math:`N` is the number of samples. The -Barnes-Hut method improves on the exact method where t-SNE complexity is -:math:`O[d N^2]`, but has several other notable differences: - -* The Barnes-Hut implementation only works when the target dimensionality is 3 - or less. The 2D case is typical when building visualizations. -* Barnes-Hut only works with dense input data. Sparse data matrices can only be - embedded with the exact method or can be approximated by a dense low rank - projection for instance using :class:`~sklearn.decomposition.PCA` -* Barnes-Hut is an approximation of the exact method. The approximation is - parameterized with the angle parameter, therefore the angle parameter is - unused when method="exact" -* Barnes-Hut is significantly more scalable. Barnes-Hut can be used to embed - hundred of thousands of data points while the exact method can handle - thousands of samples before becoming computationally intractable - -For visualization purpose (which is the main use case of t-SNE), using the -Barnes-Hut method is strongly recommended. The exact t-SNE method is useful -for checking the theoretically properties of the embedding possibly in higher -dimensional space but limit to small datasets due to computational constraints. - -Also note that the digits labels roughly match the natural grouping found by -t-SNE while the linear 2D projection of the PCA model yields a representation -where label regions largely overlap. This is a strong clue that this data can -be well separated by non linear methods that focus on the local structure (e.g. -an SVM with a Gaussian RBF kernel). However, failing to visualize well -separated homogeneously labeled groups with t-SNE in 2D does not necessarily -imply that the data cannot be correctly classified by a supervised model. It -might be the case that 2 dimensions are not high enough to accurately represent -the internal structure of the data. - -|details-end| - -.. topic:: References: - - * `"Visualizing High-Dimensional Data Using t-SNE" - `_ - van der Maaten, L.J.P.; Hinton, G. Journal of Machine Learning Research - (2008) - - * `"t-Distributed Stochastic Neighbor Embedding" - `_ - van der Maaten, L.J.P. - - * `"Accelerating t-SNE using Tree-Based Algorithms" - `_ - van der Maaten, L.J.P.; Journal of Machine Learning Research 15(Oct):3221-3245, 2014. - - * `"Automated optimized parameters for T-distributed stochastic neighbor - embedding improve visualization and analysis of large datasets" - `_ - Belkina, A.C., Ciccolella, C.O., Anno, R., Halpert, R., Spidlen, J., - Snyder-Cappione, J.E., Nature Communications 10, 5415 (2019). +.. dropdown:: Optimizing t-SNE + + The main purpose of t-SNE is visualization of high-dimensional data. Hence, + it works best when the data will be embedded on two or three dimensions. + + Optimizing the KL divergence can be a little bit tricky sometimes. There are + five parameters that control the optimization of t-SNE and therefore possibly + the quality of the resulting embedding: + + * perplexity + * early exaggeration factor + * learning rate + * maximum number of iterations + * angle (not used in the exact method) + + The perplexity is defined as :math:`k=2^{(S)}` where :math:`S` is the Shannon + entropy of the conditional probability distribution. The perplexity of a + :math:`k`-sided die is :math:`k`, so that :math:`k` is effectively the number of + nearest neighbors t-SNE considers when generating the conditional probabilities. + Larger perplexities lead to more nearest neighbors and less sensitive to small + structure. Conversely a lower perplexity considers a smaller number of + neighbors, and thus ignores more global information in favour of the + local neighborhood. As dataset sizes get larger more points will be + required to get a reasonable sample of the local neighborhood, and hence + larger perplexities may be required. Similarly noisier datasets will require + larger perplexity values to encompass enough local neighbors to see beyond + the background noise. + + The maximum number of iterations is usually high enough and does not need + any tuning. The optimization consists of two phases: the early exaggeration + phase and the final optimization. During early exaggeration the joint + probabilities in the original space will be artificially increased by + multiplication with a given factor. Larger factors result in larger gaps + between natural clusters in the data. If the factor is too high, the KL + divergence could increase during this phase. Usually it does not have to be + tuned. A critical parameter is the learning rate. If it is too low gradient + descent will get stuck in a bad local minimum. If it is too high the KL + divergence will increase during optimization. A heuristic suggested in + Belkina et al. (2019) is to set the learning rate to the sample size + divided by the early exaggeration factor. We implement this heuristic + as `learning_rate='auto'` argument. More tips can be found in + Laurens van der Maaten's FAQ (see references). The last parameter, angle, + is a tradeoff between performance and accuracy. Larger angles imply that we + can approximate larger regions by a single point, leading to better speed + but less accurate results. + + `"How to Use t-SNE Effectively" `_ + provides a good discussion of the effects of the various parameters, as well + as interactive plots to explore the effects of different parameters. + +.. dropdown:: Barnes-Hut t-SNE + + The Barnes-Hut t-SNE that has been implemented here is usually much slower than + other manifold learning algorithms. The optimization is quite difficult + and the computation of the gradient is :math:`O[d N log(N)]`, where :math:`d` + is the number of output dimensions and :math:`N` is the number of samples. The + Barnes-Hut method improves on the exact method where t-SNE complexity is + :math:`O[d N^2]`, but has several other notable differences: + + * The Barnes-Hut implementation only works when the target dimensionality is 3 + or less. The 2D case is typical when building visualizations. + * Barnes-Hut only works with dense input data. Sparse data matrices can only be + embedded with the exact method or can be approximated by a dense low rank + projection for instance using :class:`~sklearn.decomposition.PCA` + * Barnes-Hut is an approximation of the exact method. The approximation is + parameterized with the angle parameter, therefore the angle parameter is + unused when method="exact" + * Barnes-Hut is significantly more scalable. Barnes-Hut can be used to embed + hundreds of thousands of data points while the exact method can handle + thousands of samples before becoming computationally intractable + + For visualization purpose (which is the main use case of t-SNE), using the + Barnes-Hut method is strongly recommended. The exact t-SNE method is useful + for checking the theoretical properties of the embedding possibly in higher + dimensional space but limited to small datasets due to computational constraints. + + Also note that the digits labels roughly match the natural grouping found by + t-SNE while the linear 2D projection of the PCA model yields a representation + where label regions largely overlap. This is a strong clue that this data can + be well separated by non linear methods that focus on the local structure (e.g. + an SVM with a Gaussian RBF kernel). However, failing to visualize well + separated homogeneously labeled groups with t-SNE in 2D does not necessarily + imply that the data cannot be correctly classified by a supervised model. It + might be the case that 2 dimensions are not high enough to accurately represent + the internal structure of the data. + +.. rubric:: References + +* `"Visualizing High-Dimensional Data Using t-SNE" + `_ + van der Maaten, L.J.P.; Hinton, G. Journal of Machine Learning Research (2008) + +* `"t-Distributed Stochastic Neighbor Embedding" + `_ van der Maaten, L.J.P. + +* `"Accelerating t-SNE using Tree-Based Algorithms" + `_ + van der Maaten, L.J.P.; Journal of Machine Learning Research 15(Oct):3221-3245, 2014. + +* `"Automated optimized parameters for T-distributed stochastic neighbor + embedding improve visualization and analysis of large datasets" + `_ + Belkina, A.C., Ciccolella, C.O., Anno, R., Halpert, R., Spidlen, J., + Snyder-Cappione, J.E., Nature Communications 10, 5415 (2019). Tips on practical use ===================== @@ -721,5 +686,5 @@ Tips on practical use .. seealso:: :ref:`random_trees_embedding` can also be useful to derive non-linear - representations of feature space, also it does not perform + representations of feature space, but it does not perform dimensionality reduction. diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst index caea39319e869..f65d86a758b03 100644 --- a/doc/modules/metrics.rst +++ b/doc/modules/metrics.rst @@ -87,11 +87,11 @@ represented as tf-idf vectors. can produce normalized vectors, in which case :func:`cosine_similarity` is equivalent to :func:`linear_kernel`, only slower.) -.. topic:: References: +.. rubric:: References - * C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to - Information Retrieval. Cambridge University Press. - https://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html +* C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to + Information Retrieval. Cambridge University Press. + https://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html .. _linear_kernel: @@ -111,7 +111,7 @@ Polynomial kernel ----------------- The function :func:`polynomial_kernel` computes the degree-d polynomial kernel between two vectors. The polynomial kernel represents the similarity between two -vectors. Conceptually, the polynomial kernels considers not only the similarity +vectors. Conceptually, the polynomial kernel considers not only the similarity between vectors under the same dimension, but also across dimensions. When used in machine learning algorithms, this allows to account for feature interaction. @@ -222,10 +222,10 @@ which is a distance between discrete probability distributions. The chi squared kernel is most commonly used on histograms (bags) of visual words. -.. topic:: References: +.. rubric:: References - * Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C. - Local features and kernels for classification of texture and object - categories: A comprehensive study - International Journal of Computer Vision 2007 - https://hal.archives-ouvertes.fr/hal-00171412/document +* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C. + Local features and kernels for classification of texture and object + categories: A comprehensive study + International Journal of Computer Vision 2007 + https://hal.archives-ouvertes.fr/hal-00171412/document diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst index df5d8020a1369..694bde784d61e 100644 --- a/doc/modules/mixture.rst +++ b/doc/modules/mixture.rst @@ -42,7 +42,7 @@ algorithm for fitting mixture-of-Gaussian models. It can also draw confidence ellipsoids for multivariate models, and compute the Bayesian Information Criterion to assess the number of clusters in the data. A :meth:`GaussianMixture.fit` method is provided that learns a Gaussian -Mixture Model from train data. Given test data, it can assign to each +Mixture Model from training data. Given test data, it can assign to each sample the Gaussian it most probably belongs to using the :meth:`GaussianMixture.predict` method. @@ -60,128 +60,111 @@ full covariance. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_covariances.py` for an example of - using the Gaussian mixture as clustering on the iris dataset. +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_covariances.py` for an example of + using the Gaussian mixture as clustering on the iris dataset. - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the - density estimation. +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the + density estimation. -|details-start| -**Pros and cons of class GaussianMixture** -|details-split| +.. dropdown:: Pros and cons of class GaussianMixture -.. topic:: Pros: + .. rubric:: Pros - :Speed: It is the fastest algorithm for learning mixture models + :Speed: It is the fastest algorithm for learning mixture models - :Agnostic: As this algorithm maximizes only the likelihood, it - will not bias the means towards zero, or bias the cluster sizes to - have specific structures that might or might not apply. + :Agnostic: As this algorithm maximizes only the likelihood, it + will not bias the means towards zero, or bias the cluster sizes to + have specific structures that might or might not apply. -.. topic:: Cons: + .. rubric:: Cons - :Singularities: When one has insufficiently many points per - mixture, estimating the covariance matrices becomes difficult, - and the algorithm is known to diverge and find solutions with - infinite likelihood unless one regularizes the covariances artificially. + :Singularities: When one has insufficiently many points per + mixture, estimating the covariance matrices becomes difficult, + and the algorithm is known to diverge and find solutions with + infinite likelihood unless one regularizes the covariances artificially. - :Number of components: This algorithm will always use all the - components it has access to, needing held-out data - or information theoretical criteria to decide how many components to use - in the absence of external cues. + :Number of components: This algorithm will always use all the + components it has access to, needing held-out data + or information theoretical criteria to decide how many components to use + in the absence of external cues. -|details-end| +.. dropdown:: Selecting the number of components in a classical Gaussian Mixture model + The BIC criterion can be used to select the number of components in a Gaussian + Mixture in an efficient way. In theory, it recovers the true number of + components only in the asymptotic regime (i.e. if much data is available and + assuming that the data was actually generated i.i.d. from a mixture of Gaussian + distributions). Note that using a :ref:`Variational Bayesian Gaussian mixture ` + avoids the specification of the number of components for a Gaussian mixture + model. -|details-start| -**Selecting the number of components in a classical Gaussian Mixture model** -|details-split| + .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_selection_002.png + :target: ../auto_examples/mixture/plot_gmm_selection.html + :align: center + :scale: 50% -The BIC criterion can be used to select the number of components in a Gaussian -Mixture in an efficient way. In theory, it recovers the true number of -components only in the asymptotic regime (i.e. if much data is available and -assuming that the data was actually generated i.i.d. from a mixture of Gaussian -distribution). Note that using a :ref:`Variational Bayesian Gaussian mixture ` -avoids the specification of the number of components for a Gaussian mixture -model. + .. rubric:: Examples -.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_selection_002.png - :target: ../auto_examples/mixture/plot_gmm_selection.html - :align: center - :scale: 50% - -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example - of model selection performed with classical Gaussian mixture. - -|details-end| + * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example + of model selection performed with classical Gaussian mixture. .. _expectation_maximization: -|details-start| -**Estimation algorithm expectation-maximization** -|details-split| - -The main difficulty in learning Gaussian mixture models from unlabeled -data is that one usually doesn't know which points came from -which latent component (if one has access to this information it gets -very easy to fit a separate Gaussian distribution to each set of -points). `Expectation-maximization -`_ -is a well-founded statistical -algorithm to get around this problem by an iterative process. First -one assumes random components (randomly centered on data points, -learned from k-means, or even just normally distributed around the -origin) and computes for each point a probability of being generated by -each component of the model. Then, one tweaks the -parameters to maximize the likelihood of the data given those -assignments. Repeating this process is guaranteed to always converge -to a local optimum. - -|details-end| - -|details-start| -**Choice of the Initialization method** -|details-split| - -There is a choice of four initialization methods (as well as inputting user defined -initial means) to generate the initial centers for the model components: - -k-means (default) - This applies a traditional k-means clustering algorithm. - This can be computationally expensive compared to other initialization methods. - -k-means++ - This uses the initialization method of k-means clustering: k-means++. - This will pick the first center at random from the data. Subsequent centers will be - chosen from a weighted distribution of the data favouring points further away from - existing centers. k-means++ is the default initialization for k-means so will be - quicker than running a full k-means but can still take a significant amount of - time for large data sets with many components. - -random_from_data - This will pick random data points from the input data as the initial - centers. This is a very fast method of initialization but can produce non-convergent - results if the chosen points are too close to each other. - -random - Centers are chosen as a small perturbation away from the mean of all data. - This method is simple but can lead to the model taking longer to converge. - -.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png - :target: ../auto_examples/mixture/plot_gmm_init.html - :align: center - :scale: 50% - -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of - using different initializations in Gaussian Mixture. - -|details-end| +.. dropdown:: Estimation algorithm expectation-maximization + + The main difficulty in learning Gaussian mixture models from unlabeled + data is that one usually doesn't know which points came from + which latent component (if one has access to this information it gets + very easy to fit a separate Gaussian distribution to each set of + points). `Expectation-maximization + `_ + is a well-founded statistical + algorithm to get around this problem by an iterative process. First + one assumes random components (randomly centered on data points, + learned from k-means, or even just normally distributed around the + origin) and computes for each point a probability of being generated by + each component of the model. Then, one tweaks the + parameters to maximize the likelihood of the data given those + assignments. Repeating this process is guaranteed to always converge + to a local optimum. + +.. dropdown:: Choice of the Initialization method + + There is a choice of four initialization methods (as well as inputting user defined + initial means) to generate the initial centers for the model components: + + k-means (default) + This applies a traditional k-means clustering algorithm. + This can be computationally expensive compared to other initialization methods. + + k-means++ + This uses the initialization method of k-means clustering: k-means++. + This will pick the first center at random from the data. Subsequent centers will be + chosen from a weighted distribution of the data favouring points further away from + existing centers. k-means++ is the default initialization for k-means so will be + quicker than running a full k-means but can still take a significant amount of + time for large data sets with many components. + + random_from_data + This will pick random data points from the input data as the initial + centers. This is a very fast method of initialization but can produce non-convergent + results if the chosen points are too close to each other. + + random + Centers are chosen as a small perturbation away from the mean of all data. + This method is simple but can lead to the model taking longer to converge. + + .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_init_001.png + :target: ../auto_examples/mixture/plot_gmm_init.html + :align: center + :scale: 50% + + .. rubric:: Examples + + * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of + using different initializations in Gaussian Mixture. .. _bgmm: @@ -225,7 +208,7 @@ uses a truncated distribution with a fixed maximum number of components (called the Stick-breaking representation). The number of components actually used almost always depends on the data. -The next figure compares the results obtained for the different type of the +The next figure compares the results obtained for the different types of the weight concentration prior (parameter ``weight_concentration_prior_type``) for different values of ``weight_concentration_prior``. Here, we can see the value of the ``weight_concentration_prior`` parameter @@ -276,63 +259,58 @@ from the two resulting mixtures. -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm.py` for an example on - plotting the confidence ellipsoids for both :class:`GaussianMixture` - and :class:`BayesianGaussianMixture`. - - * :ref:`sphx_glr_auto_examples_mixture_plot_gmm_sin.py` shows using - :class:`GaussianMixture` and :class:`BayesianGaussianMixture` to fit a - sine wave. +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_mixture_plot_concentration_prior.py` - for an example plotting the confidence ellipsoids for the - :class:`BayesianGaussianMixture` with different - ``weight_concentration_prior_type`` for different values of the parameter - ``weight_concentration_prior``. +* See :ref:`sphx_glr_auto_examples_mixture_plot_gmm.py` for an example on + plotting the confidence ellipsoids for both :class:`GaussianMixture` + and :class:`BayesianGaussianMixture`. -|details-start| -**Pros and cons of variational inference with BayesianGaussianMixture** -|details-split| +* :ref:`sphx_glr_auto_examples_mixture_plot_gmm_sin.py` shows using + :class:`GaussianMixture` and :class:`BayesianGaussianMixture` to fit a + sine wave. -.. topic:: Pros: +* See :ref:`sphx_glr_auto_examples_mixture_plot_concentration_prior.py` + for an example plotting the confidence ellipsoids for the + :class:`BayesianGaussianMixture` with different + ``weight_concentration_prior_type`` for different values of the parameter + ``weight_concentration_prior``. - :Automatic selection: when ``weight_concentration_prior`` is small enough and - ``n_components`` is larger than what is found necessary by the model, the - Variational Bayesian mixture model has a natural tendency to set some mixture - weights values close to zero. This makes it possible to let the model choose - a suitable number of effective components automatically. Only an upper bound - of this number needs to be provided. Note however that the "ideal" number of - active components is very application specific and is typically ill-defined - in a data exploration setting. +.. dropdown:: Pros and cons of variational inference with BayesianGaussianMixture - :Less sensitivity to the number of parameters: unlike finite models, which will - almost always use all components as much as they can, and hence will produce - wildly different solutions for different numbers of components, the - variational inference with a Dirichlet process prior - (``weight_concentration_prior_type='dirichlet_process'``) won't change much - with changes to the parameters, leading to more stability and less tuning. + .. rubric:: Pros - :Regularization: due to the incorporation of prior information, - variational solutions have less pathological special cases than - expectation-maximization solutions. + :Automatic selection: When ``weight_concentration_prior`` is small enough and + ``n_components`` is larger than what is found necessary by the model, the + Variational Bayesian mixture model has a natural tendency to set some mixture + weights values close to zero. This makes it possible to let the model choose + a suitable number of effective components automatically. Only an upper bound + of this number needs to be provided. Note however that the "ideal" number of + active components is very application specific and is typically ill-defined + in a data exploration setting. + :Less sensitivity to the number of parameters: Unlike finite models, which will + almost always use all components as much as they can, and hence will produce + wildly different solutions for different numbers of components, the + variational inference with a Dirichlet process prior + (``weight_concentration_prior_type='dirichlet_process'``) won't change much + with changes to the parameters, leading to more stability and less tuning. -.. topic:: Cons: + :Regularization: Due to the incorporation of prior information, + variational solutions have less pathological special cases than + expectation-maximization solutions. - :Speed: the extra parametrization necessary for variational inference makes - inference slower, although not by much. + .. rubric:: Cons - :Hyperparameters: this algorithm needs an extra hyperparameter - that might need experimental tuning via cross-validation. + :Speed: The extra parametrization necessary for variational inference makes + inference slower, although not by much. - :Bias: there are many implicit biases in the inference algorithms (and also in - the Dirichlet process if used), and whenever there is a mismatch between - these biases and the data it might be possible to fit better models using a - finite mixture. + :Hyperparameters: This algorithm needs an extra hyperparameter + that might need experimental tuning via cross-validation. -|details-end| + :Bias: There are many implicit biases in the inference algorithms (and also in + the Dirichlet process if used), and whenever there is a mismatch between + these biases and the data it might be possible to fit better models using a + finite mixture. .. _dirichlet_process: diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 7caacd697ea1c..cf168295a6024 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -6,18 +6,158 @@ Metrics and scoring: quantifying the quality of predictions =========================================================== +.. _which_scoring_function: + +Which scoring function should I use? +==================================== + +Before we take a closer look into the details of the many scores and +:term:`evaluation metrics`, we want to give some guidance, inspired by statistical +decision theory, on the choice of **scoring functions** for **supervised learning**, +see [Gneiting2009]_: + +- *Which scoring function should I use?* +- *Which scoring function is a good one for my task?* + +In a nutshell, if the scoring function is given, e.g. in a kaggle competition +or in a business context, use that one. +If you are free to choose, it starts by considering the ultimate goal and application +of the prediction. It is useful to distinguish two steps: + +* Predicting +* Decision making + +**Predicting:** +Usually, the response variable :math:`Y` is a random variable, in the sense that there +is *no deterministic* function :math:`Y = g(X)` of the features :math:`X`. +Instead, there is a probability distribution :math:`F` of :math:`Y`. +One can aim to predict the whole distribution, known as *probabilistic prediction*, +or---more the focus of scikit-learn---issue a *point prediction* (or point forecast) +by choosing a property or functional of that distribution :math:`F`. +Typical examples are the mean (expected value), the median or a quantile of the +response variable :math:`Y` (conditionally on :math:`X`). + +Once that is settled, use a **strictly consistent** scoring function for that +(target) functional, see [Gneiting2009]_. +This means using a scoring function that is aligned with *measuring the distance +between predictions* `y_pred` *and the true target functional using observations of* +:math:`Y`, i.e. `y_true`. +For classification **strictly proper scoring rules**, see +`Wikipedia entry for Scoring rule `_ +and [Gneiting2007]_, coincide with strictly consistent scoring functions. +The table further below provides examples. +One could say that consistent scoring functions act as *truth serum* in that +they guarantee *"that truth telling [. . .] is an optimal strategy in +expectation"* [Gneiting2014]_. + +Once a strictly consistent scoring function is chosen, it is best used for both: as +loss function for model training and as metric/score in model evaluation and model +comparison. + +Note that for regressors, the prediction is done with :term:`predict` while for +classifiers it is usually :term:`predict_proba`. + +**Decision Making:** +The most common decisions are done on binary classification tasks, where the result of +:term:`predict_proba` is turned into a single outcome, e.g., from the predicted +probability of rain a decision is made on how to act (whether to take mitigating +measures like an umbrella or not). +For classifiers, this is what :term:`predict` returns. +See also :ref:`TunedThresholdClassifierCV`. +There are many scoring functions which measure different aspects of such a +decision, most of them are covered with or derived from the +:func:`metrics.confusion_matrix`. + +**List of strictly consistent scoring functions:** +Here, we list some of the most relevant statistical functionals and corresponding +strictly consistent scoring functions for tasks in practice. Note that the list is not +complete and that there are more of them. +For further criteria on how to select a specific one, see [Fissler2022]_. + +================== =================================================== ==================== ================================= +functional scoring or loss function response `y` prediction +================== =================================================== ==================== ================================= +**Classification** +mean :ref:`Brier score ` :sup:`1` multi-class ``predict_proba`` +mean :ref:`log loss ` multi-class ``predict_proba`` +mode :ref:`zero-one loss ` :sup:`2` multi-class ``predict``, categorical +**Regression** +mean :ref:`squared error ` :sup:`3` all reals ``predict``, all reals +mean :ref:`Poisson deviance ` non-negative ``predict``, strictly positive +mean :ref:`Gamma deviance ` strictly positive ``predict``, strictly positive +mean :ref:`Tweedie deviance ` depends on ``power`` ``predict``, depends on ``power`` +median :ref:`absolute error ` all reals ``predict``, all reals +quantile :ref:`pinball loss ` all reals ``predict``, all reals +mode no consistent one exists reals +================== =================================================== ==================== ================================= + +:sup:`1` The Brier score is just a different name for the squared error in case of +classification. + +:sup:`2` The zero-one loss is only consistent but not strictly consistent for the mode. +The zero-one loss is equivalent to one minus the accuracy score, meaning it gives +different score values but the same ranking. + +:sup:`3` R² gives the same ranking as squared error. + +**Fictitious Example:** +Let's make the above arguments more tangible. Consider a setting in network reliability +engineering, such as maintaining stable internet or Wi-Fi connections. +As provider of the network, you have access to the dataset of log entries of network +connections containing network load over time and many interesting features. +Your goal is to improve the reliability of the connections. +In fact, you promise your customers that on at least 99% of all days there are no +connection discontinuities larger than 1 minute. +Therefore, you are interested in a prediction of the 99% quantile (of longest +connection interruption duration per day) in order to know in advance when to add +more bandwidth and thereby satisfy your customers. So the *target functional* is the +99% quantile. From the table above, you choose the pinball loss as scoring function +(fair enough, not much choice given), for model training (e.g. +`HistGradientBoostingRegressor(loss="quantile", quantile=0.99)`) as well as model +evaluation (`mean_pinball_loss(..., alpha=0.99)` - we apologize for the different +argument names, `quantile` and `alpha`) be it in grid search for finding +hyperparameters or in comparing to other models like +`QuantileRegressor(quantile=0.99)`. + +.. rubric:: References + +.. [Gneiting2007] T. Gneiting and A. E. Raftery. :doi:`Strictly Proper + Scoring Rules, Prediction, and Estimation <10.1198/016214506000001437>` + In: Journal of the American Statistical Association 102 (2007), + pp. 359– 378. + `link to pdf `_ + +.. [Gneiting2009] T. Gneiting. :arxiv:`Making and Evaluating Point Forecasts + <0912.0902>` + Journal of the American Statistical Association 106 (2009): 746 - 762. + +.. [Gneiting2014] T. Gneiting and M. Katzfuss. :doi:`Probabilistic Forecasting + <10.1146/annurev-statistics-062713-085831>`. In: Annual Review of Statistics and Its Application 1.1 (2014), pp. 125–151. + +.. [Fissler2022] T. Fissler, C. Lorentzen and M. Mayer. :arxiv:`Model + Comparison and Calibration Assessment: User Guide for Consistent Scoring + Functions in Machine Learning and Actuarial Practice. <2202.12780>` + +.. _scoring_api_overview: + +Scoring API overview +==================== + There are 3 different APIs for evaluating the quality of a model's predictions: * **Estimator score method**: Estimators have a ``score`` method providing a default evaluation criterion for the problem they are designed to solve. - This is not discussed on this page, but in each estimator's documentation. + Most commonly this is :ref:`accuracy ` for classifiers and the + :ref:`coefficient of determination ` (:math:`R^2`) for regressors. + Details for each estimator can be found in its documentation. -* **Scoring parameter**: Model-evaluation tools using +* **Scoring parameter**: Model-evaluation tools that use :ref:`cross-validation ` (such as - :func:`model_selection.cross_val_score` and - :class:`model_selection.GridSearchCV`) rely on an internal *scoring* strategy. - This is discussed in the section :ref:`scoring_parameter`. + :class:`model_selection.GridSearchCV`, :func:`model_selection.validation_curve` and + :class:`linear_model.LogisticRegressionCV`) rely on an internal *scoring* strategy. + This can be specified using the `scoring` parameter of that tool and is discussed + in the section :ref:`scoring_parameter`. * **Metric functions**: The :mod:`sklearn.metrics` module implements functions assessing prediction error for specific purposes. These metrics are detailed @@ -38,24 +178,39 @@ value of those metrics for random predictions. The ``scoring`` parameter: defining model evaluation rules ========================================================== -Model selection and evaluation using tools, such as -:class:`model_selection.GridSearchCV` and -:func:`model_selection.cross_val_score`, take a ``scoring`` parameter that +Model selection and evaluation tools that internally use +:ref:`cross-validation ` (such as +:class:`model_selection.GridSearchCV`, :func:`model_selection.validation_curve` and +:class:`linear_model.LogisticRegressionCV`) take a ``scoring`` parameter that controls what metric they apply to the estimators evaluated. -Common cases: predefined values -------------------------------- +They can be specified in several ways: + +* `None`: the estimator's default evaluation criterion (i.e., the metric used in the + estimator's `score` method) is used. +* :ref:`String name `: common metrics can be passed via a string + name. +* :ref:`Callable `: more complex metrics can be passed via a custom + metric callable (e.g., function). + +Some tools do also accept multiple metric evaluation. See :ref:`multimetric_scoring` +for details. + +.. _scoring_string_names: + +String name scorers +------------------- For the most common use cases, you can designate a scorer object with the -``scoring`` parameter; the table below shows all possible values. +``scoring`` parameter via a string name; the table below shows all possible values. All scorer objects follow the convention that **higher return values are better -than lower return values**. Thus metrics which measure the distance between +than lower return values**. Thus metrics which measure the distance between the model and the data, like :func:`metrics.mean_squared_error`, are -available as neg_mean_squared_error which return the negated value +available as 'neg_mean_squared_error' which return the negated value of the metric. ==================================== ============================================== ================================== -Scoring Function Comment +Scoring string name Function Comment ==================================== ============================================== ================================== **Classification** 'accuracy' :func:`metrics.accuracy_score` @@ -77,6 +232,7 @@ Scoring Function 'roc_auc_ovo' :func:`metrics.roc_auc_score` 'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` 'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` +'d2_log_loss_score' :func:`metrics.d2_log_loss_score` **Clustering** 'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` @@ -91,7 +247,7 @@ Scoring Function **Regression** 'explained_variance' :func:`metrics.explained_variance_score` -'max_error' :func:`metrics.max_error` +'neg_max_error' :func:`metrics.max_error` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` 'neg_root_mean_squared_error' :func:`metrics.root_mean_squared_error` @@ -102,7 +258,7 @@ Scoring Function 'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` 'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` 'neg_mean_absolute_percentage_error' :func:`metrics.mean_absolute_percentage_error` -'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` +'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` ==================================== ============================================== ================================== Usage examples: @@ -112,7 +268,7 @@ Usage examples: >>> X, y = datasets.load_iris(return_X_y=True) >>> clf = svm.SVC(random_state=0) >>> cross_val_score(clf, X, y, cv=5, scoring='recall_macro') - array([0.96..., 0.96..., 0.96..., 0.93..., 1. ]) + array([0.96, 0.96, 0.96, 0.93, 1. ]) .. note:: @@ -122,12 +278,23 @@ Usage examples: .. currentmodule:: sklearn.metrics -.. _scoring: +.. _scoring_callable: + +Callable scorers +---------------- + +For more complex use cases and more flexibility, you can pass a callable to +the `scoring` parameter. This can be done by: -Defining your scoring strategy from metric functions ------------------------------------------------------ +* :ref:`scoring_adapt_metric` +* :ref:`scoring_custom` (most flexible) -The following metrics functions are not implemented as named scorers, +.. _scoring_adapt_metric: + +Adapting predefined metrics via `make_scorer` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following metric functions are not implemented as named scorers, sometimes because they require additional parameters, such as :func:`fbeta_score`. They cannot be passed to the ``scoring`` parameters; instead their callable needs to be passed to @@ -165,100 +332,94 @@ measuring a prediction error given ground truth and prediction: maximize, the higher the better. - functions ending with ``_error``, ``_loss``, or ``_deviance`` return a - value to minimize, the lower the better. When converting + value to minimize, the lower the better. When converting into a scorer object using :func:`make_scorer`, set the ``greater_is_better`` parameter to ``False`` (``True`` by default; see the parameter description below). - -|details-start| -**Custom scorer objects** -|details-split| - - -The second use case is to build a completely custom scorer object -from a simple python function using :func:`make_scorer`, which can -take several parameters: - -* the python function you want to use (``my_custom_loss_func`` - in the example below) - -* whether the python function returns a score (``greater_is_better=True``, - the default) or a loss (``greater_is_better=False``). If a loss, the output - of the python function is negated by the scorer object, conforming to - the cross validation convention that scorers return higher values for better models. - -* for classification metrics only: whether the python function you provided requires - continuous decision certainties. If the scoring function only accepts probability - estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter - `response_method`, thus in this case `response_method="predict_proba"`. Some scoring - function do not necessarily require probability estimates but rather non-thresholded - decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a - list such as `response_method=["decision_function", "predict_proba"]`. In this case, - the scorer will use the first available method, in the order given in the list, - to compute the scores. - -* any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`. - -Here is an example of building custom scorers, and of using the -``greater_is_better`` parameter:: - - >>> import numpy as np - >>> def my_custom_loss_func(y_true, y_pred): - ... diff = np.abs(y_true - y_pred).max() - ... return np.log1p(diff) - ... - >>> # score will negate the return value of my_custom_loss_func, - >>> # which will be np.log(2), 0.693, given the values for X - >>> # and y defined below. - >>> score = make_scorer(my_custom_loss_func, greater_is_better=False) - >>> X = [[1], [1]] - >>> y = [0, 1] - >>> from sklearn.dummy import DummyClassifier - >>> clf = DummyClassifier(strategy='most_frequent', random_state=0) - >>> clf = clf.fit(X, y) - >>> my_custom_loss_func(y, clf.predict(X)) - 0.69... - >>> score(clf, X, y) - -0.69... - -|details-end| - -.. _diy_scoring: - -Implementing your own scoring object ------------------------------------- - -You can generate even more flexible model scorers by constructing your own -scoring object from scratch, without using the :func:`make_scorer` factory. - - -|details-start| -**How to build a scorer from scratch** -|details-split| - -For a callable to be a scorer, it needs to meet the protocol specified by -the following two rules: - -- It can be called with parameters ``(estimator, X, y)``, where ``estimator`` - is the model that should be evaluated, ``X`` is validation data, and ``y`` is - the ground truth target for ``X`` (in the supervised case) or ``None`` (in the - unsupervised case). - -- It returns a floating point number that quantifies the - ``estimator`` prediction quality on ``X``, with reference to ``y``. - Again, by convention higher numbers are better, so if your scorer - returns loss, that value should be negated. - -- Advanced: If it requires extra metadata to be passed to it, it should expose - a ``get_metadata_routing`` method returning the requested metadata. The user - should be able to set the requested metadata via a ``set_score_request`` - method. Please see :ref:`User Guide ` and :ref:`Developer - Guide ` for - more details. - - -.. note:: **Using custom scorers in functions where n_jobs > 1** +.. _scoring_custom: + +Creating a custom scorer object +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can create your own custom scorer object using +:func:`make_scorer` or for the most flexibility, from scratch. See below for details. + +.. dropdown:: Custom scorer objects using `make_scorer` + + You can build a completely custom scorer object + from a simple python function using :func:`make_scorer`, which can + take several parameters: + + * the python function you want to use (``my_custom_loss_func`` + in the example below) + + * whether the python function returns a score (``greater_is_better=True``, + the default) or a loss (``greater_is_better=False``). If a loss, the output + of the python function is negated by the scorer object, conforming to + the cross validation convention that scorers return higher values for better models. + + * for classification metrics only: whether the python function you provided requires + continuous decision certainties. If the scoring function only accepts probability + estimates (e.g. :func:`metrics.log_loss`), then one needs to set the parameter + `response_method="predict_proba"`. Some scoring + functions do not necessarily require probability estimates but rather non-thresholded + decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one can provide a + list (e.g., `response_method=["decision_function", "predict_proba"]`), + and scorer will use the first available method, in the order given in the list, + to compute the scores. + + * any additional parameters of the scoring function, such as ``beta`` or ``labels``. + + Here is an example of building custom scorers, and of using the + ``greater_is_better`` parameter:: + + >>> import numpy as np + >>> def my_custom_loss_func(y_true, y_pred): + ... diff = np.abs(y_true - y_pred).max() + ... return float(np.log1p(diff)) + ... + >>> # score will negate the return value of my_custom_loss_func, + >>> # which will be np.log(2), 0.693, given the values for X + >>> # and y defined below. + >>> score = make_scorer(my_custom_loss_func, greater_is_better=False) + >>> X = [[1], [1]] + >>> y = [0, 1] + >>> from sklearn.dummy import DummyClassifier + >>> clf = DummyClassifier(strategy='most_frequent', random_state=0) + >>> clf = clf.fit(X, y) + >>> my_custom_loss_func(y, clf.predict(X)) + 0.69 + >>> score(clf, X, y) + -0.69 + +.. dropdown:: Custom scorer objects from scratch + + You can generate even more flexible model scorers by constructing your own + scoring object from scratch, without using the :func:`make_scorer` factory. + + For a callable to be a scorer, it needs to meet the protocol specified by + the following two rules: + + - It can be called with parameters ``(estimator, X, y)``, where ``estimator`` + is the model that should be evaluated, ``X`` is validation data, and ``y`` is + the ground truth target for ``X`` (in the supervised case) or ``None`` (in the + unsupervised case). + + - It returns a floating point number that quantifies the + ``estimator`` prediction quality on ``X``, with reference to ``y``. + Again, by convention higher numbers are better, so if your scorer + returns loss, that value should be negated. + + - Advanced: If it requires extra metadata to be passed to it, it should expose + a ``get_metadata_routing`` method returning the requested metadata. The user + should be able to set the requested metadata via a ``set_score_request`` + method. Please see :ref:`User Guide ` and :ref:`Developer + Guide ` for + more details. + + +.. dropdown:: Using custom scorers in functions where n_jobs > 1 While defining the custom scoring function alongside the calling function should work out of the box with the default joblib backend (loky), @@ -277,8 +438,6 @@ the following two rules: ... cv=5, ... n_jobs=-1) # doctest: +SKIP -|details-end| - .. _multimetric_scoring: Using multiple metric evaluation @@ -291,13 +450,15 @@ There are three ways to specify multiple scoring metrics for the ``scoring`` parameter: - As an iterable of string metrics:: - >>> scoring = ['accuracy', 'precision'] + + >>> scoring = ['accuracy', 'precision'] - As a ``dict`` mapping the scorer name to the scoring function:: - >>> from sklearn.metrics import accuracy_score - >>> from sklearn.metrics import make_scorer - >>> scoring = {'accuracy': make_scorer(accuracy_score), - ... 'prec': 'precision'} + + >>> from sklearn.metrics import accuracy_score + >>> from sklearn.metrics import make_scorer + >>> scoring = {'accuracy': make_scorer(accuracy_score), + ... 'prec': 'precision'} Note that the dict values can either be scorer functions or one of the predefined metric strings. @@ -377,6 +538,7 @@ Some also work in the multilabel case: recall_score roc_auc_score zero_one_loss + d2_log_loss_score And some work with binary and multilabel (but not multiclass) problems: @@ -472,11 +634,11 @@ In the multilabel case with binary label indicators:: >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` - for an example of accuracy score usage using permutations of - the dataset. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` + for an example of accuracy score usage using permutations of + the dataset. .. _top_k_accuracy_score: @@ -514,7 +676,7 @@ where :math:`k` is the number of guesses allowed and :math:`1(x)` is the 0.75 >>> # Not normalizing gives the number of "correctly" classified samples >>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False) - 3 + 3.0 .. _balanced_accuracy_score: @@ -547,7 +709,7 @@ In contrast, if the conventional accuracy is above chance only because the classifier takes advantage of an imbalanced test set, then the balanced accuracy, as appropriate, will drop to :math:`\frac{1}{n\_classes}`. -The score ranges from 0 to 1, or when ``adjusted=True`` is used, it rescaled to +The score ranges from 0 to 1, or when ``adjusted=True`` is used, it is rescaled to the range :math:`\frac{1}{1 - n\_classes}` to 1, inclusive, with performance at random scoring 0. @@ -587,22 +749,20 @@ or *informedness*. * Balanced Accuracy as described in [Urbanowicz2015]_: the average of sensitivity and specificity is computed for each class and then averaged over total number of classes. -.. topic:: References: - - .. [Guyon2015] I. Guyon, K. Bennett, G. Cawley, H.J. Escalante, S. Escalera, T.K. Ho, N. Macià, - B. Ray, M. Saeed, A.R. Statnikov, E. Viegas, `Design of the 2015 ChaLearn AutoML Challenge - `_, - IJCNN 2015. - .. [Mosley2013] L. Mosley, `A balanced approach to the multi-class imbalance problem - `_, - IJCV 2010. - .. [Kelleher2015] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, `Fundamentals of - Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, - and Case Studies `_, - 2015. - .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description - and evaluation of a scalable learning classifier - system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. +.. rubric:: References + +.. [Guyon2015] I. Guyon, K. Bennett, G. Cawley, H.J. Escalante, S. Escalera, T.K. Ho, N. Macià, + B. Ray, M. Saeed, A.R. Statnikov, E. Viegas, `Design of the 2015 ChaLearn AutoML Challenge + `_, IJCNN 2015. +.. [Mosley2013] L. Mosley, `A balanced approach to the multi-class imbalance problem + `_, IJCV 2010. +.. [Kelleher2015] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, `Fundamentals of + Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, + and Case Studies `_, + 2015. +.. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description + and evaluation of a scalable learning classifier + system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. .. _cohen_kappa: @@ -614,7 +774,7 @@ The function :func:`cohen_kappa_score` computes `Cohen's kappa This measure is intended to compare labelings by different human annotators, not a classifier versus a ground truth. -The kappa score (see docstring) is a number between -1 and 1. +The kappa score is a number between -1 and 1. Scores above .8 are generally considered good agreement; zero or lower means no agreement (practically random labels). @@ -623,9 +783,9 @@ but not for multilabel problems (except by manually computing a per-label score) and not for more than two annotators. >>> from sklearn.metrics import cohen_kappa_score - >>> y_true = [2, 0, 2, 2, 0, 1] - >>> y_pred = [0, 0, 2, 2, 0, 2] - >>> cohen_kappa_score(y_true, y_pred) + >>> labeling1 = [2, 0, 2, 2, 0, 1] + >>> labeling2 = [0, 0, 2, 2, 0, 2] + >>> cohen_kappa_score(labeling1, labeling2) 0.4285714285714286 .. _confusion_matrix: @@ -677,23 +837,23 @@ false negatives and true positives as follows:: >>> y_true = [0, 0, 0, 1, 1, 1, 1, 1] >>> y_pred = [0, 1, 0, 1, 0, 1, 0, 1] - >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() + >>> tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel().tolist() >>> tn, fp, fn, tp (2, 1, 2, 3) -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py` - for an example of using a confusion matrix to evaluate classifier output - quality. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py` + for an example of using a confusion matrix to evaluate classifier output + quality. - * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` - for an example of using a confusion matrix to classify - hand-written digits. +* See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` + for an example of using a confusion matrix to classify + hand-written digits. - * See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - for an example of using a confusion matrix to classify text - documents. +* See :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` + for an example of using a confusion matrix to classify text + documents. .. _classification_report: @@ -720,15 +880,15 @@ and inferred labels:: weighted avg 0.67 0.60 0.59 5 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` - for an example of classification report usage for - hand-written digits. +* See :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` + for an example of classification report usage for + hand-written digits. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` - for an example of classification report usage for - grid search with nested cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` + for an example of classification report usage for + grid search with nested cross-validation. .. _hamming_loss: @@ -846,31 +1006,31 @@ precision-recall curve as follows. :scale: 75 :align: center -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` - for an example of :func:`precision_score` and :func:`recall_score` usage - to estimate parameters using grid search with nested cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` + for an example of :func:`precision_score` and :func:`recall_score` usage + to estimate parameters using grid search with nested cross-validation. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_precision_recall.py` - for an example of :func:`precision_recall_curve` usage to evaluate - classifier output quality. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_precision_recall.py` + for an example of :func:`precision_recall_curve` usage to evaluate + classifier output quality. -.. topic:: References: +.. rubric:: References - .. [Manning2008] C.D. Manning, P. Raghavan, H. Schütze, `Introduction to Information Retrieval - `_, - 2008. - .. [Everingham2010] M. Everingham, L. Van Gool, C.K.I. Williams, J. Winn, A. Zisserman, - `The Pascal Visual Object Classes (VOC) Challenge - `_, - IJCV 2010. - .. [Davis2006] J. Davis, M. Goadrich, `The Relationship Between Precision-Recall and ROC Curves - `_, - ICML 2006. - .. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right - `_, - NIPS 2015. +.. [Manning2008] C.D. Manning, P. Raghavan, H. Schütze, `Introduction to Information Retrieval + `_, + 2008. +.. [Everingham2010] M. Everingham, L. Van Gool, C.K.I. Williams, J. Winn, A. Zisserman, + `The Pascal Visual Object Classes (VOC) Challenge + `_, + IJCV 2010. +.. [Davis2006] J. Davis, M. Goadrich, `The Relationship Between Precision-Recall and ROC Curves + `_, + ICML 2006. +.. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right + `_, + NIPS 2015. Binary classification ^^^^^^^^^^^^^^^^^^^^^ @@ -931,15 +1091,15 @@ Here are some small examples in binary classification:: >>> metrics.recall_score(y_true, y_pred) 0.5 >>> metrics.f1_score(y_true, y_pred) - 0.66... + 0.66 >>> metrics.fbeta_score(y_true, y_pred, beta=0.5) - 0.83... + 0.83 >>> metrics.fbeta_score(y_true, y_pred, beta=1) - 0.66... + 0.66 >>> metrics.fbeta_score(y_true, y_pred, beta=2) - 0.55... + 0.55 >>> metrics.precision_recall_fscore_support(y_true, y_pred, beta=0.5) - (array([0.66..., 1. ]), array([1. , 0.5]), array([0.71..., 0.83...]), array([2, 2])) + (array([0.66, 1. ]), array([1. , 0.5]), array([0.71, 0.83]), array([2, 2])) >>> import numpy as np @@ -949,13 +1109,13 @@ Here are some small examples in binary classification:: >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> precision, recall, threshold = precision_recall_curve(y_true, y_scores) >>> precision - array([0.5 , 0.66..., 0.5 , 1. , 1. ]) + array([0.5 , 0.66, 0.5 , 1. , 1. ]) >>> recall array([1. , 1. , 0.5, 0.5, 0. ]) >>> threshold array([0.1 , 0.35, 0.4 , 0.8 ]) >>> average_precision_score(y_true, y_scores) - 0.83... + 0.83 @@ -1018,15 +1178,15 @@ Then the metrics are defined as: >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> metrics.precision_score(y_true, y_pred, average='macro') - 0.22... + 0.22 >>> metrics.recall_score(y_true, y_pred, average='micro') - 0.33... + 0.33 >>> metrics.f1_score(y_true, y_pred, average='weighted') - 0.26... + 0.267 >>> metrics.fbeta_score(y_true, y_pred, average='macro', beta=0.5) - 0.23... + 0.238 >>> metrics.precision_recall_fscore_support(y_true, y_pred, beta=0.5, average=None) - (array([0.66..., 0. , 0. ]), array([1., 0., 0.]), array([0.71..., 0. , 0. ]), array([2, 2, 2]...)) + (array([0.667, 0., 0.]), array([1., 0., 0.]), array([0.714, 0., 0.]), array([2, 2, 2])) For multiclass classification with a "negative class", it is possible to exclude some labels: @@ -1037,12 +1197,12 @@ For multiclass classification with a "negative class", it is possible to exclude Similarly, labels not present in the data sample may be accounted for in macro-averaging. >>> metrics.precision_score(y_true, y_pred, labels=[0, 1, 2, 3], average='macro') - 0.166... + 0.166 -.. topic:: References: +.. rubric:: References - .. [OB2019] :arxiv:`Opitz, J., & Burst, S. (2019). "Macro f1 and macro f1." - <1911.03347>` +.. [OB2019] :arxiv:`Opitz, J., & Burst, S. (2019). "Macro f1 and macro f1." + <1911.03347>` .. _jaccard_similarity_score: @@ -1074,7 +1234,7 @@ In the binary case:: >>> y_pred = np.array([[1, 1, 1], ... [1, 0, 0]]) >>> jaccard_score(y_true[0], y_pred[0]) - 0.6666... + 0.6666 In the 2D comparison case (e.g. image similarity): @@ -1084,9 +1244,9 @@ In the 2D comparison case (e.g. image similarity): In the multilabel case with binary label indicators:: >>> jaccard_score(y_true, y_pred, average='samples') - 0.5833... + 0.5833 >>> jaccard_score(y_true, y_pred, average='macro') - 0.6666... + 0.6666 >>> jaccard_score(y_true, y_pred, average=None) array([0.5, 0.5, 1. ]) @@ -1096,11 +1256,11 @@ multilabel problem:: >>> y_pred = [0, 2, 1, 2] >>> y_true = [0, 1, 2, 2] >>> jaccard_score(y_true, y_pred, average=None) - array([1. , 0. , 0.33...]) + array([1. , 0. , 0.33]) >>> jaccard_score(y_true, y_pred, average='macro') - 0.44... + 0.44 >>> jaccard_score(y_true, y_pred, average='micro') - 0.33... + 0.33 .. _hinge_loss: @@ -1153,9 +1313,9 @@ with a svm classifier in a binary class problem:: LinearSVC(random_state=0) >>> pred_decision = est.decision_function([[-2], [3], [0.5]]) >>> pred_decision - array([-2.18..., 2.36..., 0.09...]) + array([-2.18, 2.36, 0.09]) >>> hinge_loss([-1, 1, 1], pred_decision) - 0.3... + 0.3 Here is an example demonstrating the use of the :func:`hinge_loss` function with a svm classifier in a multiclass problem:: @@ -1169,7 +1329,7 @@ with a svm classifier in a multiclass problem:: >>> pred_decision = est.decision_function([[-1], [2], [3]]) >>> y_true = [0, 2, 3] >>> hinge_loss(y_true, pred_decision, labels=labels) - 0.56... + 0.56 .. _log_loss: @@ -1184,30 +1344,30 @@ probability outputs (``predict_proba``) of a classifier instead of its discrete predictions. For binary classification with a true label :math:`y \in \{0,1\}` -and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, +and a probability estimate :math:`\hat{p} \approx \operatorname{Pr}(y = 1)`, the log loss per sample is the negative log-likelihood of the classifier given the true label: .. math:: - L_{\log}(y, p) = -\log \operatorname{Pr}(y|p) = -(y \log (p) + (1 - y) \log (1 - p)) + L_{\log}(y, \hat{p}) = -\log \operatorname{Pr}(y|\hat{p}) = -(y \log (\hat{p}) + (1 - y) \log (1 - \hat{p})) This extends to the multiclass case as follows. Let the true labels for a set of samples be encoded as a 1-of-K binary indicator matrix :math:`Y`, i.e., :math:`y_{i,k} = 1` if sample :math:`i` has label :math:`k` taken from a set of :math:`K` labels. -Let :math:`P` be a matrix of probability estimates, -with :math:`p_{i,k} = \operatorname{Pr}(y_{i,k} = 1)`. +Let :math:`\hat{P}` be a matrix of probability estimates, +with elements :math:`\hat{p}_{i,k} \approx \operatorname{Pr}(y_{i,k} = 1)`. Then the log loss of the whole set is .. math:: - L_{\log}(Y, P) = -\log \operatorname{Pr}(Y|P) = - \frac{1}{N} \sum_{i=0}^{N-1} \sum_{k=0}^{K-1} y_{i,k} \log p_{i,k} + L_{\log}(Y, \hat{P}) = -\log \operatorname{Pr}(Y|\hat{P}) = - \frac{1}{N} \sum_{i=0}^{N-1} \sum_{k=0}^{K-1} y_{i,k} \log \hat{p}_{i,k} To see how this generalizes the binary log loss given above, note that in the binary case, -:math:`p_{i,0} = 1 - p_{i,1}` and :math:`y_{i,0} = 1 - y_{i,1}`, +:math:`\hat{p}_{i,0} = 1 - \hat{p}_{i,1}` and :math:`y_{i,0} = 1 - y_{i,1}`, so expanding the inner sum over :math:`y_{i,k} \in \{0,1\}` gives the binary log loss. @@ -1219,7 +1379,7 @@ method. >>> y_true = [0, 0, 1, 1] >>> y_pred = [[.9, .1], [.8, .2], [.3, .7], [.01, .99]] >>> log_loss(y_true, y_pred) - 0.1738... + 0.1738 The first ``[.9, .1]`` in ``y_pred`` denotes 90% probability that the first sample has label 0. The log loss is non-negative. @@ -1274,8 +1434,9 @@ Then the multiclass MCC is defined as: When there are more than two labels, the value of the MCC will no longer range between -1 and +1. Instead the minimum value will be somewhere between -1 and 0 -depending on the number and distribution of ground true labels. The maximum +depending on the number and distribution of ground truth labels. The maximum value is always +1. +For additional information, see [WikipediaMCC2021]_. Here is a small example illustrating the usage of the :func:`matthews_corrcoef` function: @@ -1284,7 +1445,14 @@ function: >>> y_true = [+1, +1, +1, -1] >>> y_pred = [+1, -1, +1, +1] >>> matthews_corrcoef(y_true, y_pred) - -0.33... + -0.33 + +.. rubric:: References + +.. [WikipediaMCC2021] Wikipedia contributors. Phi coefficient. + Wikipedia, The Free Encyclopedia. April 21, 2021, 12:21 CEST. + Available at: https://en.wikipedia.org/wiki/Phi_coefficient + Accessed April 21, 2021. .. _multilabel_confusion_matrix: @@ -1464,7 +1632,7 @@ Therefore, the `y_score` parameter is of size (n_samples,). >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.metrics import roc_auc_score >>> X, y = load_breast_cancer(return_X_y=True) - >>> clf = LogisticRegression(solver="liblinear").fit(X, y) + >>> clf = LogisticRegression().fit(X, y) >>> clf.classes_ array([0, 1]) @@ -1472,12 +1640,12 @@ We can use the probability estimates corresponding to `clf.classes_[1]`. >>> y_score = clf.predict_proba(X)[:, 1] >>> roc_auc_score(y, y_score) - 0.99... + 0.99 Otherwise, we can use the non-thresholded decision values >>> roc_auc_score(y, clf.decision_function(X)) - 0.99... + 0.99 .. _roc_auc_multiclass: @@ -1494,65 +1662,57 @@ correspond to the probability estimates that a sample belongs to a particular class. The OvO and OvR algorithms support weighting uniformly (``average='macro'``) and by prevalence (``average='weighted'``). -|details-start| -**One-vs-one Algorithm** -|details-split| +.. dropdown:: One-vs-one Algorithm -Computes the average AUC of all possible pairwise -combinations of classes. [HT2001]_ defines a multiclass AUC metric weighted -uniformly: + Computes the average AUC of all possible pairwise + combinations of classes. [HT2001]_ defines a multiclass AUC metric weighted + uniformly: -.. math:: + .. math:: - \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) + - \text{AUC}(k | j)) + \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c (\text{AUC}(j | k) + + \text{AUC}(k | j)) -where :math:`c` is the number of classes and :math:`\text{AUC}(j | k)` is the -AUC with class :math:`j` as the positive class and class :math:`k` as the -negative class. In general, -:math:`\text{AUC}(j | k) \neq \text{AUC}(k | j))` in the multiclass -case. This algorithm is used by setting the keyword argument ``multiclass`` -to ``'ovo'`` and ``average`` to ``'macro'``. + where :math:`c` is the number of classes and :math:`\text{AUC}(j | k)` is the + AUC with class :math:`j` as the positive class and class :math:`k` as the + negative class. In general, + :math:`\text{AUC}(j | k) \neq \text{AUC}(k | j))` in the multiclass + case. This algorithm is used by setting the keyword argument ``multiclass`` + to ``'ovo'`` and ``average`` to ``'macro'``. -The [HT2001]_ multiclass AUC metric can be extended to be weighted by the -prevalence: + The [HT2001]_ multiclass AUC metric can be extended to be weighted by the + prevalence: -.. math:: + .. math:: - \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)( - \text{AUC}(j | k) + \text{AUC}(k | j)) + \frac{1}{c(c-1)}\sum_{j=1}^{c}\sum_{k > j}^c p(j \cup k)( + \text{AUC}(j | k) + \text{AUC}(k | j)) -where :math:`c` is the number of classes. This algorithm is used by setting -the keyword argument ``multiclass`` to ``'ovo'`` and ``average`` to -``'weighted'``. The ``'weighted'`` option returns a prevalence-weighted average -as described in [FC2009]_. + where :math:`c` is the number of classes. This algorithm is used by setting + the keyword argument ``multiclass`` to ``'ovo'`` and ``average`` to + ``'weighted'``. The ``'weighted'`` option returns a prevalence-weighted average + as described in [FC2009]_. -|details-end| +.. dropdown:: One-vs-rest Algorithm -|details-start| -**One-vs-rest Algorithm** -|details-split| + Computes the AUC of each class against the rest + [PD2000]_. The algorithm is functionally the same as the multilabel case. To + enable this algorithm set the keyword argument ``multiclass`` to ``'ovr'``. + Additionally to ``'macro'`` [F2006]_ and ``'weighted'`` [F2001]_ averaging, OvR + supports ``'micro'`` averaging. -Computes the AUC of each class against the rest -[PD2000]_. The algorithm is functionally the same as the multilabel case. To -enable this algorithm set the keyword argument ``multiclass`` to ``'ovr'``. -Additionally to ``'macro'`` [F2006]_ and ``'weighted'`` [F2001]_ averaging, OvR -supports ``'micro'`` averaging. + In applications where a high false positive rate is not tolerable the parameter + ``max_fpr`` of :func:`roc_auc_score` can be used to summarize the ROC curve up + to the given limit. -In applications where a high false positive rate is not tolerable the parameter -``max_fpr`` of :func:`roc_auc_score` can be used to summarize the ROC curve up -to the given limit. - -The following figure shows the micro-averaged ROC curve and its corresponding -ROC-AUC score for a classifier aimed to distinguish the different species in -the :ref:`iris_dataset`: - -.. image:: ../auto_examples/model_selection/images/sphx_glr_plot_roc_002.png - :target: ../auto_examples/model_selection/plot_roc.html - :scale: 75 - :align: center + The following figure shows the micro-averaged ROC curve and its corresponding + ROC-AUC score for a classifier aimed to distinguish the different species in + the :ref:`iris_dataset`: -|details-end| + .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_roc_002.png + :target: ../auto_examples/model_selection/plot_roc.html + :scale: 75 + :align: center .. _roc_auc_multilabel: @@ -1568,11 +1728,11 @@ class with the greater label for each output. >>> from sklearn.datasets import make_multilabel_classification >>> from sklearn.multioutput import MultiOutputClassifier >>> X, y = make_multilabel_classification(random_state=0) - >>> inner_clf = LogisticRegression(solver="liblinear", random_state=0) + >>> inner_clf = LogisticRegression(random_state=0) >>> clf = MultiOutputClassifier(inner_clf).fit(X, y) >>> y_score = np.transpose([y_pred[:, 1] for y_pred in clf.predict_proba(X)]) >>> roc_auc_score(y, y_score, average=None) - array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...]) + array([0.828, 0.851, 0.94, 0.87, 0.95]) And the decision values do not require such processing. @@ -1580,48 +1740,45 @@ And the decision values do not require such processing. >>> clf = RidgeClassifierCV().fit(X, y) >>> y_score = clf.decision_function(X) >>> roc_auc_score(y, y_score, average=None) - array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...]) + array([0.82, 0.85, 0.93, 0.87, 0.94]) -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py` - for an example of using ROC to - evaluate the quality of the output of a classifier. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py` for an example of + using ROC to evaluate the quality of the output of a classifier. - * See :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py` - for an example of using ROC to - evaluate classifier output quality, using cross-validation. +* See :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py` for an + example of using ROC to evaluate classifier output quality, using cross-validation. - * See :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` - for an example of using ROC to - model species distribution. +* See :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` + for an example of using ROC to model species distribution. -.. topic:: References: +.. rubric:: References - .. [HT2001] Hand, D.J. and Till, R.J., (2001). `A simple generalisation - of the area under the ROC curve for multiple class classification problems. - `_ - Machine learning, 45(2), pp. 171-186. +.. [HT2001] Hand, D.J. and Till, R.J., (2001). `A simple generalisation + of the area under the ROC curve for multiple class classification problems. + `_ + Machine learning, 45(2), pp. 171-186. - .. [FC2009] Ferri, Cèsar & Hernandez-Orallo, Jose & Modroiu, R. (2009). - `An Experimental Comparison of Performance Measures for Classification. - `_ - Pattern Recognition Letters. 30. 27-38. +.. [FC2009] Ferri, Cèsar & Hernandez-Orallo, Jose & Modroiu, R. (2009). + `An Experimental Comparison of Performance Measures for Classification. + `_ + Pattern Recognition Letters. 30. 27-38. - .. [PD2000] Provost, F., Domingos, P. (2000). `Well-trained PETs: Improving - probability estimation trees - `_ - (Section 6.2), CeDER Working Paper #IS-00-04, Stern School of Business, - New York University. +.. [PD2000] Provost, F., Domingos, P. (2000). `Well-trained PETs: Improving + probability estimation trees + `_ + (Section 6.2), CeDER Working Paper #IS-00-04, Stern School of Business, + New York University. - .. [F2006] Fawcett, T., 2006. `An introduction to ROC analysis. - `_ - Pattern Recognition Letters, 27(8), pp. 861-874. +.. [F2006] Fawcett, T., 2006. `An introduction to ROC analysis. + `_ + Pattern Recognition Letters, 27(8), pp. 861-874. - .. [F2001] Fawcett, T., 2001. `Using rule sets to maximize - ROC performance `_ - In Data Mining, 2001. - Proceedings IEEE International Conference, pp. 131-138. +.. [F2001] Fawcett, T., 2001. `Using rule sets to maximize + ROC performance `_ + In Data Mining, 2001. + Proceedings IEEE International Conference, pp. 131-138. .. _det_curve: @@ -1657,67 +1814,57 @@ same classification task: :scale: 75 :align: center -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` - for an example comparison between receiver operating characteristic (ROC) - curves and Detection error tradeoff (DET) curves. - -|details-start| -**Properties** -|details-split| +.. dropdown:: Properties -* DET curves form a linear curve in normal deviate scale if the detection - scores are normally (or close-to normally) distributed. - It was shown by [Navratil2007]_ that the reverse is not necessarily true and - even more general distributions are able to produce linear DET curves. + * DET curves form a linear curve in normal deviate scale if the detection + scores are normally (or close-to normally) distributed. + It was shown by [Navratil2007]_ that the reverse is not necessarily true and + even more general distributions are able to produce linear DET curves. -* The normal deviate scale transformation spreads out the points such that a - comparatively larger space of plot is occupied. - Therefore curves with similar classification performance might be easier to - distinguish on a DET plot. + * The normal deviate scale transformation spreads out the points such that a + comparatively larger space of plot is occupied. + Therefore curves with similar classification performance might be easier to + distinguish on a DET plot. -* With False Negative Rate being "inverse" to True Positive Rate the point - of perfection for DET curves is the origin (in contrast to the top left - corner for ROC curves). + * With False Negative Rate being "inverse" to True Positive Rate the point + of perfection for DET curves is the origin (in contrast to the top left + corner for ROC curves). -|details-end| +.. dropdown:: Applications and limitations -|details-start| -**Applications and limitations** -|details-split| + DET curves are intuitive to read and hence allow quick visual assessment of a + classifier's performance. + Additionally DET curves can be consulted for threshold analysis and operating + point selection. + This is particularly helpful if a comparison of error types is required. -DET curves are intuitive to read and hence allow quick visual assessment of a -classifier's performance. -Additionally DET curves can be consulted for threshold analysis and operating -point selection. -This is particularly helpful if a comparison of error types is required. + On the other hand DET curves do not provide their metric as a single number. + Therefore for either automated evaluation or comparison to other + classification tasks metrics like the derived area under ROC curve might be + better suited. -On the other hand DET curves do not provide their metric as a single number. -Therefore for either automated evaluation or comparison to other -classification tasks metrics like the derived area under ROC curve might be -better suited. +.. rubric:: Examples -|details-end| +* See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` + for an example comparison between receiver operating characteristic (ROC) + curves and Detection error tradeoff (DET) curves. -.. topic:: References: +.. rubric:: References - .. [WikipediaDET2017] Wikipedia contributors. Detection error tradeoff. - Wikipedia, The Free Encyclopedia. September 4, 2017, 23:33 UTC. - Available at: https://en.wikipedia.org/w/index.php?title=Detection_error_tradeoff&oldid=798982054. - Accessed February 19, 2018. +.. [WikipediaDET2017] Wikipedia contributors. Detection error tradeoff. + Wikipedia, The Free Encyclopedia. September 4, 2017, 23:33 UTC. + Available at: https://en.wikipedia.org/w/index.php?title=Detection_error_tradeoff&oldid=798982054. + Accessed February 19, 2018. - .. [Martin1997] A. Martin, G. Doddington, T. Kamm, M. Ordowski, and M. Przybocki, - `The DET Curve in Assessment of Detection Task Performance - `_, - NIST 1997. +.. [Martin1997] A. Martin, G. Doddington, T. Kamm, M. Ordowski, and M. Przybocki, + `The DET Curve in Assessment of Detection Task Performance + `_, NIST 1997. - .. [Navratil2007] J. Navractil and D. Klusacek, - "`On Linear DETs, - `_" - 2007 IEEE International Conference on Acoustics, - Speech and Signal Processing - ICASSP '07, Honolulu, - HI, 2007, pp. IV-229-IV-232. +.. [Navratil2007] J. Navratil and D. Klusacek, + `"On Linear DETs" `_, + 2007 IEEE International Conference on Acoustics, + Speech and Signal Processing - ICASSP '07, Honolulu, + HI, 2007, pp. IV-229-IV-232. .. _zero_one_loss: @@ -1765,52 +1912,75 @@ set [0,1] has an error:: >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)), normalize=False) 1.0 -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` - for an example of zero one loss usage to perform recursive feature - elimination with cross-validation. +* See :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` + for an example of zero one loss usage to perform recursive feature + elimination with cross-validation. .. _brier_score_loss: Brier score loss ---------------- -The :func:`brier_score_loss` function computes the -`Brier score `_ -for binary classes [Brier1950]_. Quoting Wikipedia: +The :func:`brier_score_loss` function computes the `Brier score +`_ for binary and multiclass +probabilistic predictions and is equivalent to the mean squared error. +Quoting Wikipedia: - "The Brier score is a proper score function that measures the accuracy of - probabilistic predictions. It is applicable to tasks in which predictions - must assign probabilities to a set of mutually exclusive discrete outcomes." + "The Brier score is a strictly proper scoring rule that measures the accuracy of + probabilistic predictions. [...] [It] is applicable to tasks in which predictions + must assign probabilities to a set of mutually exclusive discrete outcomes or + classes." -This function returns the mean squared error of the actual outcome -:math:`y \in \{0,1\}` and the predicted probability estimate -:math:`p = \operatorname{Pr}(y = 1)` (:term:`predict_proba`) as outputted by: +Let the true labels for a set of :math:`N` data points be encoded as a 1-of-K binary +indicator matrix :math:`Y`, i.e., :math:`y_{i,k} = 1` if sample :math:`i` has +label :math:`k` taken from a set of :math:`K` labels. Let :math:`\hat{P}` be a matrix +of probability estimates with elements :math:`\hat{p}_{i,k} \approx \operatorname{Pr}(y_{i,k} = 1)`. +Following the original definition by [Brier1950]_, the Brier score is given by: .. math:: - BS = \frac{1}{n_{\text{samples}}} \sum_{i=0}^{n_{\text{samples}} - 1}(y_i - p_i)^2 + BS(Y, \hat{P}) = \frac{1}{N}\sum_{i=0}^{N-1}\sum_{k=0}^{K-1}(y_{i,k} - \hat{p}_{i,k})^{2} -The Brier score loss is also between 0 to 1 and the lower the value (the mean -square difference is smaller), the more accurate the prediction is. +The Brier score lies in the interval :math:`[0, 2]` and the lower the value the +better the probability estimates are (the mean squared difference is smaller). +Actually, the Brier score is a strictly proper scoring rule, meaning that it +achieves the best score only when the estimated probabilities equal the +true ones. + +Note that in the binary case, the Brier score is usually divided by two and +ranges between :math:`[0,1]`. For binary targets :math:`y_i \in {0, 1}` and +probability estimates :math:`\hat{p}_i \approx \operatorname{Pr}(y_i = 1)` +for the positive class, the Brier score is then equal to: + +.. math:: + + BS(y, \hat{p}) = \frac{1}{N} \sum_{i=0}^{N - 1}(y_i - \hat{p}_i)^2 + +The :func:`brier_score_loss` function computes the Brier score given the +ground-truth labels and predicted probabilities, as returned by an estimator's +``predict_proba`` method. The `scale_by_half` parameter controls which of the +two above definitions to follow. -Here is a small example of usage of this function:: >>> import numpy as np >>> from sklearn.metrics import brier_score_loss >>> y_true = np.array([0, 1, 1, 0]) >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) >>> y_prob = np.array([0.1, 0.9, 0.8, 0.4]) - >>> y_pred = np.array([0, 1, 1, 0]) >>> brier_score_loss(y_true, y_prob) 0.055 >>> brier_score_loss(y_true, 1 - y_prob, pos_label=0) 0.055 >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham") 0.055 - >>> brier_score_loss(y_true, y_prob > 0.5) - 0.0 + >>> brier_score_loss( + ... ["eggs", "ham", "spam"], + ... [[0.8, 0.1, 0.1], [0.2, 0.7, 0.1], [0.2, 0.2, 0.6]], + ... labels=["eggs", "ham", "spam"], + ... ) + 0.146 The Brier score can be used to assess how well a classifier is calibrated. However, a lower Brier score loss does not always mean a better calibration. @@ -1825,28 +1995,27 @@ necessarily mean a better calibrated model. "Only when refinement loss remains the same does a lower Brier score loss always mean better calibration" [Bella2012]_, [Flach2008]_. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` - for an example of Brier score loss usage to perform probability - calibration of classifiers. +* See :ref:`sphx_glr_auto_examples_calibration_plot_calibration.py` + for an example of Brier score loss usage to perform probability + calibration of classifiers. -.. topic:: References: +.. rubric:: References - .. [Brier1950] G. Brier, `Verification of forecasts expressed in terms of - probability - `_, - Monthly weather review 78.1 (1950) +.. [Brier1950] G. Brier, `Verification of forecasts expressed in terms of probability + `_, + Monthly weather review 78.1 (1950) - .. [Bella2012] Bella, Ferri, Hernández-Orallo, and Ramírez-Quintana - `"Calibration of Machine Learning Models" - `_ - in Khosrow-Pour, M. "Machine learning: concepts, methodologies, tools - and applications." Hershey, PA: Information Science Reference (2012). +.. [Bella2012] Bella, Ferri, Hernández-Orallo, and Ramírez-Quintana + `"Calibration of Machine Learning Models" + `_ + in Khosrow-Pour, M. "Machine learning: concepts, methodologies, tools + and applications." Hershey, PA: Information Science Reference (2012). - .. [Flach2008] Flach, Peter, and Edson Matsubara. `"On classification, ranking, - and probability estimation." `_ - Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum fr Informatik (2008). +.. [Flach2008] Flach, Peter, and Edson Matsubara. `"On classification, ranking, + and probability estimation." `_ + Dagstuhl Seminar Proceedings. Schloss Dagstuhl-Leibniz-Zentrum für Informatik (2008). .. _class_likelihood_ratios: @@ -1899,92 +2068,147 @@ counts ``tp`` (see `the wikipedia page `_ for the actual formulas). -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` - -|details-start| -**Interpretation across varying prevalence** -|details-split| +.. rubric:: Examples -Both class likelihood ratios are interpretable in terms of an odds ratio -(pre-test and post-tests): +* :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` -.. math:: +.. dropdown:: Interpretation across varying prevalence - \text{post-test odds} = \text{Likelihood ratio} \times \text{pre-test odds}. + Both class likelihood ratios are interpretable in terms of an odds ratio + (pre-test and post-tests): -Odds are in general related to probabilities via + .. math:: -.. math:: + \text{post-test odds} = \text{Likelihood ratio} \times \text{pre-test odds}. - \text{odds} = \frac{\text{probability}}{1 - \text{probability}}, + Odds are in general related to probabilities via -or equivalently + .. math:: -.. math:: + \text{odds} = \frac{\text{probability}}{1 - \text{probability}}, - \text{probability} = \frac{\text{odds}}{1 + \text{odds}}. + or equivalently -On a given population, the pre-test probability is given by the prevalence. By -converting odds to probabilities, the likelihood ratios can be translated into a -probability of truly belonging to either class before and after a classifier -prediction: + .. math:: -.. math:: + \text{probability} = \frac{\text{odds}}{1 + \text{odds}}. - \text{post-test odds} = \text{Likelihood ratio} \times - \frac{\text{pre-test probability}}{1 - \text{pre-test probability}}, + On a given population, the pre-test probability is given by the prevalence. By + converting odds to probabilities, the likelihood ratios can be translated into a + probability of truly belonging to either class before and after a classifier + prediction: -.. math:: + .. math:: - \text{post-test probability} = \frac{\text{post-test odds}}{1 + \text{post-test odds}}. + \text{post-test odds} = \text{Likelihood ratio} \times + \frac{\text{pre-test probability}}{1 - \text{pre-test probability}}, -|details-end| + .. math:: -|details-start| -**Mathematical divergences** -|details-split| + \text{post-test probability} = \frac{\text{post-test odds}}{1 + \text{post-test odds}}. -The positive likelihood ratio is undefined when :math:`fp = 0`, which can be -interpreted as the classifier perfectly identifying positive cases. If :math:`fp -= 0` and additionally :math:`tp = 0`, this leads to a zero/zero division. This -happens, for instance, when using a `DummyClassifier` that always predicts the -negative class and therefore the interpretation as a perfect classifier is lost. +.. dropdown:: Mathematical divergences -The negative likelihood ratio is undefined when :math:`tn = 0`. Such divergence -is invalid, as :math:`LR_- > 1` would indicate an increase in the odds of a -sample belonging to the positive class after being classified as negative, as if -the act of classifying caused the positive condition. This includes the case of -a `DummyClassifier` that always predicts the positive class (i.e. when -:math:`tn=fn=0`). + The positive likelihood ratio (`LR+`) is undefined when :math:`fp=0`, meaning the + classifier does not misclassify any negative labels as positives. This condition can + either indicate a perfect identification of all the negative cases or, if there are + also no true positive predictions (:math:`tp=0`), that the classifier does not predict + the positive class at all. In the first case, `LR+` can be interpreted as `np.inf`, in + the second case (for instance, with highly imbalanced data) it can be interpreted as + `np.nan`. -Both class likelihood ratios are undefined when :math:`tp=fn=0`, which means -that no samples of the positive class were present in the testing set. This can -also happen when cross-validating highly imbalanced data. + The negative likelihood ratio (`LR-`) is undefined when :math:`tn=0`. Such + divergence is invalid, as :math:`LR_- > 1.0` would indicate an increase in the odds of + a sample belonging to the positive class after being classified as negative, as if the + act of classifying caused the positive condition. This includes the case of a + :class:`~sklearn.dummy.DummyClassifier` that always predicts the positive class + (i.e. when :math:`tn=fn=0`). -In all the previous cases the :func:`class_likelihood_ratios` function raises by -default an appropriate warning message and returns `nan` to avoid pollution when -averaging over cross-validation folds. + Both class likelihood ratios (`LR+ and LR-`) are undefined when :math:`tp=fn=0`, which + means that no samples of the positive class were present in the test set. This can + happen when cross-validating on highly imbalanced data and also leads to a division by + zero. -For a worked-out demonstration of the :func:`class_likelihood_ratios` function, -see the example below. + If a division by zero occurs and `raise_warning` is set to `True` (default), + :func:`class_likelihood_ratios` raises an `UndefinedMetricWarning` and returns + `np.nan` by default to avoid pollution when averaging over cross-validation folds. + Users can set return values in case of a division by zero with the + `replace_undefined_by` param. -|details-end| + For a worked-out demonstration of the :func:`class_likelihood_ratios` function, + see the example below. -|details-start| -**References** -|details-split| +.. dropdown:: References * `Wikipedia entry for Likelihood ratios in diagnostic testing `_ * Brenner, H., & Gefeller, O. (1997). Variation of sensitivity, specificity, likelihood ratios and predictive - values with disease prevalence. - Statistics in medicine, 16(9), 981-991. + values with disease prevalence. Statistics in medicine, 16(9), 981-991. + + +.. _d2_score_classification: + +D² score for classification +--------------------------- + +The D² score computes the fraction of deviance explained. +It is a generalization of R², where the squared error is generalized and replaced +by a classification deviance of choice :math:`\text{dev}(y, \hat{y})` +(e.g., Log loss). D² is a form of a *skill score*. +It is calculated as + +.. math:: + + D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,. + +Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model +(e.g., the per-class proportion of `y_true` in the case of the Log loss). + +Like R², the best possible score is 1.0 and it can be negative (because the +model can be arbitrarily worse). A constant model that always predicts +:math:`y_{\text{null}}`, disregarding the input features, would get a D² score +of 0.0. + +.. dropdown:: D2 log loss score + + The :func:`d2_log_loss_score` function implements the special case + of D² with the log loss, see :ref:`log_loss`, i.e.: + + .. math:: + + \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). + + Here are some usage examples of the :func:`d2_log_loss_score` function:: + + >>> from sklearn.metrics import d2_log_loss_score + >>> y_true = [1, 1, 2, 3] + >>> y_pred = [ + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.98, 0.01, 0.01], + ... [0.01, 0.98, 0.01], + ... [0.01, 0.01, 0.98], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.981 + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.1, 0.6, 0.3], + ... [0.1, 0.6, 0.3], + ... [0.4, 0.5, 0.1], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + -0.552 -|details-end| .. _multilabel_ranking_metrics: @@ -2006,7 +2230,7 @@ The :func:`coverage_error` function computes the average number of labels that have to be included in the final prediction such that all true labels are predicted. This is useful if you want to know how many top-scored-labels you have to predict in average without missing any true one. The best value -of this metrics is thus the average number of true labels. +of this metric is thus the average number of true labels. .. note:: @@ -2082,7 +2306,7 @@ Here is a small example of usage of this function:: >>> y_true = np.array([[1, 0, 0], [0, 0, 1]]) >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]]) >>> label_ranking_average_precision_score(y_true, y_score) - 0.416... + 0.416 .. _label_ranking_loss: @@ -2117,21 +2341,18 @@ Here is a small example of usage of this function:: >>> y_true = np.array([[1, 0, 0], [0, 0, 1]]) >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]]) >>> label_ranking_loss(y_true, y_score) - 0.75... + 0.75 >>> # With the following prediction, we have perfect and minimal loss >>> y_score = np.array([[1.0, 0.1, 0.2], [0.1, 0.2, 0.9]]) >>> label_ranking_loss(y_true, y_score) 0.0 -|details-start| -**References** -|details-split| +.. dropdown:: References * Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and knowledge discovery handbook (pp. 667-685). Springer US. -|details-end| .. _ndcg: @@ -2151,7 +2372,7 @@ engine algorithms or related applications. Using a graded relevance scale of documents in a search-engine result set, DCG measures the usefulness, or gain, of a document based on its position in the result list. The gain is accumulated from the top of the result list to the bottom, with the gain of each result -discounted at lower ranks" +discounted at lower ranks." DCG orders the true targets (e.g. relevance of query answers) in the predicted order, then multiplies them by a logarithmic decay and sums the result. The sum @@ -2177,9 +2398,7 @@ DCG score is and the NDCG score is the DCG score divided by the DCG score obtained for :math:`y`. -|details-start| -**References** -|details-split| +.. dropdown:: References * `Wikipedia entry for Discounted Cumulative Gain `_ @@ -2197,7 +2416,6 @@ and the NDCG score is the DCG score divided by the DCG score obtained for European conference on information retrieval (pp. 414-421). Springer, Berlin, Heidelberg. -|details-end| .. _regression_metrics: @@ -2230,9 +2448,6 @@ leads to a weighting of each individual score by the variance of the corresponding target variable. This setting quantifies the globally captured unscaled variance. If the target variables are of different scale, then this score puts more importance on explaining the higher variance variables. -``multioutput='variance_weighted'`` is the default value for :func:`r2_score` -for backward compatibility. This will be changed to ``uniform_average`` in the -future. .. _r2_score: @@ -2284,19 +2499,19 @@ Here is a small example of usage of the :func:`r2_score` function:: >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> r2_score(y_true, y_pred) - 0.948... + 0.948 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, multioutput='variance_weighted') - 0.938... + 0.938 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, multioutput='uniform_average') - 0.936... + 0.936 >>> r2_score(y_true, y_pred, multioutput='raw_values') - array([0.965..., 0.908...]) + array([0.965, 0.908]) >>> r2_score(y_true, y_pred, multioutput=[0.3, 0.7]) - 0.925... + 0.925 >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> r2_score(y_true, y_pred) @@ -2310,11 +2525,11 @@ Here is a small example of usage of the :func:`r2_score` function:: >>> r2_score(y_true, y_pred, force_finite=False) -inf -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` - for an example of R² score usage to - evaluate Lasso and Elastic Net on sparse signals. +* See :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` + for an example of R² score usage to + evaluate Lasso and Elastic Net on sparse signals. .. _mean_absolute_error: @@ -2348,14 +2563,14 @@ Here is a small example of usage of the :func:`mean_absolute_error` function:: >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) - 0.85... + 0.85 .. _mean_squared_error: Mean squared error ------------------- -The :func:`mean_squared_error` function computes `mean square +The :func:`mean_squared_error` function computes `mean squared error `_, a risk metric corresponding to the expected value of the squared (quadratic) error or loss. @@ -2379,16 +2594,15 @@ function:: >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_squared_error(y_true, y_pred) - 0.7083... + 0.7083 -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` - for an example of mean squared error usage to - evaluate gradient boosting regression. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` + for an example of mean squared error usage to evaluate gradient boosting regression. Taking the square root of the MSE, called the root mean squared error (RMSE), is another -common metric that provides a measure in the same units as the target variable. RSME is +common metric that provides a measure in the same units as the target variable. RMSE is available through the :func:`root_mean_squared_error` function. .. _mean_squared_log_error: @@ -2422,11 +2636,11 @@ function:: >>> y_true = [3, 5, 2.5, 7] >>> y_pred = [2.5, 5, 4, 8] >>> mean_squared_log_error(y_true, y_pred) - 0.039... + 0.0397 >>> y_true = [[0.5, 1], [1, 2], [7, 6]] >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]] >>> mean_squared_log_error(y_true, y_pred) - 0.044... + 0.044 The root mean squared logarithmic error (RMSLE) is available through the :func:`root_mean_squared_log_error` function. @@ -2460,13 +2674,29 @@ function:: >>> y_true = [1, 10, 1e6] >>> y_pred = [0.9, 15, 1.2e6] >>> mean_absolute_percentage_error(y_true, y_pred) - 0.2666... + 0.2666 In above example, if we had used `mean_absolute_error`, it would have ignored the small magnitude values and only reflected the error in prediction of highest magnitude value. But that problem is resolved in case of MAPE because it calculates relative percentage error with respect to actual output. +.. note:: + + The MAPE formula here does not represent the common "percentage" definition: the + percentage in the range [0, 100] is converted to a relative value in the range [0, + 1] by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2. + The motivation here is to have a range of values that is more consistent with other + error metrics in scikit-learn, such as `accuracy_score`. + + To obtain the mean absolute percentage error as per the Wikipedia formula, + multiply the `mean_absolute_percentage_error` computed here by 100. + +.. dropdown:: References + + * `Wikipedia entry for Mean Absolute Percentage Error + `_ + .. _median_absolute_error: Median absolute error @@ -2525,7 +2755,7 @@ Here is a small example of usage of the :func:`max_error` function:: >>> y_true = [3, 2, 7, 1] >>> y_pred = [9, 2, 7, 1] >>> max_error(y_true, y_pred) - 6 + 6.0 The :func:`max_error` does not support multioutput. @@ -2551,7 +2781,7 @@ The best possible score is 1.0, lower values are worse. .. topic:: Link to :ref:`r2_score` The difference between the explained variance score and the :ref:`r2_score` - is that when the explained variance score does not account for + is that the explained variance score does not account for systematic offset in the prediction. For this reason, the :ref:`r2_score` should be preferred in general. @@ -2572,13 +2802,13 @@ function:: >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> explained_variance_score(y_true, y_pred) - 0.957... + 0.957 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> explained_variance_score(y_true, y_pred, multioutput='raw_values') - array([0.967..., 1. ]) + array([0.967, 1. ]) >>> explained_variance_score(y_true, y_pred, multioutput=[0.3, 0.7]) - 0.990... + 0.990 >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> explained_variance_score(y_true, y_pred) @@ -2650,16 +2880,16 @@ prediction difference of the second point,:: If we increase ``power`` to 1,:: >>> mean_tweedie_deviance([1.0], [1.5], power=1) - 0.18... + 0.189 >>> mean_tweedie_deviance([100.], [150.], power=1) - 18.9... + 18.9 the difference in errors decreases. Finally, by setting, ``power=2``:: >>> mean_tweedie_deviance([1.0], [1.5], power=2) - 0.14... + 0.144 >>> mean_tweedie_deviance([100.], [150.], power=2) - 0.14... + 0.144 we would get identical errors. The deviance when ``power=2`` is thus only sensitive to relative errors. @@ -2686,13 +2916,13 @@ Here is a small example of usage of the :func:`mean_pinball_loss` function:: >>> from sklearn.metrics import mean_pinball_loss >>> y_true = [1, 2, 3] >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1) - 0.03... + 0.033 >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1) - 0.3... + 0.3 >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9) - 0.3... + 0.3 >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9) - 0.03... + 0.033 >>> mean_pinball_loss(y_true, y_true, alpha=0.1) 0.0 >>> mean_pinball_loss(y_true, y_true, alpha=0.9) @@ -2717,18 +2947,18 @@ quantile regressor via cross-validation: ... random_state=0, ... ) >>> cross_val_score(estimator, X, y, cv=5, scoring=mean_pinball_loss_95p) - array([13.6..., 9.7..., 23.3..., 9.5..., 10.4...]) + array([13.6, 9.7, 23.3, 9.5, 10.4]) It is also possible to build scorer objects for hyper-parameter tuning. The sign of the loss must be switched to ensure that greater means better as explained in the example linked below. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py` - for an example of using the pinball loss to evaluate and tune the - hyper-parameters of quantile regression models on data with non-symmetric - noise and outliers. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py` + for an example of using the pinball loss to evaluate and tune the + hyper-parameters of quantile regression models on data with non-symmetric + noise and outliers. .. _d2_score: @@ -2754,122 +2984,66 @@ model can be arbitrarily worse). A constant model that always predicts :math:`y_{\text{null}}`, disregarding the input features, would get a D² score of 0.0. -|details-start| -**D² Tweedie score** -|details-split| - -The :func:`d2_tweedie_score` function implements the special case of D² -where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`. -It is also known as D² Tweedie and is related to McFadden's likelihood ratio index. - -The argument ``power`` defines the Tweedie power as for -:func:`mean_tweedie_deviance`. Note that for `power=0`, -:func:`d2_tweedie_score` equals :func:`r2_score` (for single targets). - -A scorer object with a specific choice of ``power`` can be built by:: - - >>> from sklearn.metrics import d2_tweedie_score, make_scorer - >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5) - -|details-end| - -|details-start| -**D² pinball score** -|details-split| +.. dropdown:: D² Tweedie score -The :func:`d2_pinball_score` function implements the special case -of D² with the pinball loss, see :ref:`pinball_loss`, i.e.: + The :func:`d2_tweedie_score` function implements the special case of D² + where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`. + It is also known as D² Tweedie and is related to McFadden's likelihood ratio index. -.. math:: - - \text{dev}(y, \hat{y}) = \text{pinball}(y, \hat{y}). + The argument ``power`` defines the Tweedie power as for + :func:`mean_tweedie_deviance`. Note that for `power=0`, + :func:`d2_tweedie_score` equals :func:`r2_score` (for single targets). -The argument ``alpha`` defines the slope of the pinball loss as for -:func:`mean_pinball_loss` (:ref:`pinball_loss`). It determines the -quantile level ``alpha`` for which the pinball loss and also D² -are optimal. Note that for `alpha=0.5` (the default) :func:`d2_pinball_score` -equals :func:`d2_absolute_error_score`. + A scorer object with a specific choice of ``power`` can be built by:: -A scorer object with a specific choice of ``alpha`` can be built by:: + >>> from sklearn.metrics import d2_tweedie_score, make_scorer + >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5) - >>> from sklearn.metrics import d2_pinball_score, make_scorer - >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8) +.. dropdown:: D² pinball score -|details-end| + The :func:`d2_pinball_score` function implements the special case + of D² with the pinball loss, see :ref:`pinball_loss`, i.e.: -|details-start| -**D² absolute error score** -|details-split| + .. math:: -The :func:`d2_absolute_error_score` function implements the special case of -the :ref:`mean_absolute_error`: - -.. math:: + \text{dev}(y, \hat{y}) = \text{pinball}(y, \hat{y}). - \text{dev}(y, \hat{y}) = \text{MAE}(y, \hat{y}). - -Here are some usage examples of the :func:`d2_absolute_error_score` function:: - - >>> from sklearn.metrics import d2_absolute_error_score - >>> y_true = [3, -0.5, 2, 7] - >>> y_pred = [2.5, 0.0, 2, 8] - >>> d2_absolute_error_score(y_true, y_pred) - 0.764... - >>> y_true = [1, 2, 3] - >>> y_pred = [1, 2, 3] - >>> d2_absolute_error_score(y_true, y_pred) - 1.0 - >>> y_true = [1, 2, 3] - >>> y_pred = [2, 2, 2] - >>> d2_absolute_error_score(y_true, y_pred) - 0.0 + The argument ``alpha`` defines the slope of the pinball loss as for + :func:`mean_pinball_loss` (:ref:`pinball_loss`). It determines the + quantile level ``alpha`` for which the pinball loss and also D² + are optimal. Note that for `alpha=0.5` (the default) :func:`d2_pinball_score` + equals :func:`d2_absolute_error_score`. -|details-end| + A scorer object with a specific choice of ``alpha`` can be built by:: -|details-start| -**D² log loss score** -|details-split| + >>> from sklearn.metrics import d2_pinball_score, make_scorer + >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8) -The :func:`d2_log_loss_score` function implements the special case -of D² with the log loss, see :ref:`log_loss`, i.e.: +.. dropdown:: D² absolute error score -.. math:: + The :func:`d2_absolute_error_score` function implements the special case of + the :ref:`mean_absolute_error`: - \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). + .. math:: -The :math:`y_{\text{null}}` for the :func:`log_loss` is the per-class -proportion. + \text{dev}(y, \hat{y}) = \text{MAE}(y, \hat{y}). -Here are some usage examples of the :func:`d2_log_loss_score` function:: + Here are some usage examples of the :func:`d2_absolute_error_score` function:: - >>> from sklearn.metrics import d2_log_loss_score - >>> y_true = [1, 1, 2, 3] - >>> y_pred = [ - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - 0.0 - >>> y_true = [1, 2, 3] - >>> y_pred = [ - ... [0.98, 0.01, 0.01], - ... [0.01, 0.98, 0.01], - ... [0.01, 0.01, 0.98], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - 0.981... - >>> y_true = [1, 2, 3] - >>> y_pred = [ - ... [0.1, 0.6, 0.3], - ... [0.1, 0.6, 0.3], - ... [0.4, 0.5, 0.1], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - -0.552... + >>> from sklearn.metrics import d2_absolute_error_score + >>> y_true = [3, -0.5, 2, 7] + >>> y_pred = [2.5, 0.0, 2, 8] + >>> d2_absolute_error_score(y_true, y_pred) + 0.764 + >>> y_true = [1, 2, 3] + >>> y_pred = [1, 2, 3] + >>> d2_absolute_error_score(y_true, y_pred) + 1.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [2, 2, 2] + >>> d2_absolute_error_score(y_true, y_pred) + 0.0 -|details-end| .. _visualization_regression_evaluation: @@ -2939,25 +3113,24 @@ model might be useful. Refer to the example below to see a model evaluation that makes use of this display. -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` for - an example on how to use :class:`~sklearn.metrics.PredictionErrorDisplay` - to visualize the prediction quality improvement of a regression model - obtained by transforming the target before learning. +* See :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py` for + an example on how to use :class:`~sklearn.metrics.PredictionErrorDisplay` + to visualize the prediction quality improvement of a regression model + obtained by transforming the target before learning. .. _clustering_metrics: Clustering metrics -====================== +================== .. currentmodule:: sklearn.metrics The :mod:`sklearn.metrics` module implements several loss, score, and utility -functions. For more information see the :ref:`clustering_evaluation` -section for instance clustering, and :ref:`biclustering_evaluation` for -biclustering. - +functions to measure clustering performance. For more information see the +:ref:`clustering_evaluation` section for instance clustering, and +:ref:`biclustering_evaluation` for biclustering. .. _dummy_estimators: @@ -2999,19 +3172,19 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``:: >>> from sklearn.svm import SVC >>> clf = SVC(kernel='linear', C=1).fit(X_train, y_train) >>> clf.score(X_test, y_test) - 0.63... + 0.63 >>> clf = DummyClassifier(strategy='most_frequent', random_state=0) >>> clf.fit(X_train, y_train) DummyClassifier(random_state=0, strategy='most_frequent') >>> clf.score(X_test, y_test) - 0.57... + 0.579 We see that ``SVC`` doesn't do much better than a dummy classifier. Now, let's change the kernel:: >>> clf = SVC(kernel='rbf', C=1).fit(X_train, y_train) >>> clf.score(X_test, y_test) - 0.94... + 0.94 We see that the accuracy was boosted to almost 100%. A cross validation strategy is recommended for a better estimate of the accuracy, if it diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 42762690ce8f7..ef7d6ab3000e1 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -172,10 +172,13 @@ Valid :term:`multiclass` representations for >>> from scipy import sparse >>> y_sparse = sparse.csr_matrix(y_dense) >>> print(y_sparse) - (0, 0) 1 - (1, 2) 1 - (2, 0) 1 - (3, 1) 1 + + Coords Values + (0, 0) 1 + (1, 2) 1 + (2, 0) 1 + (3, 1) 1 For more information about :class:`~sklearn.preprocessing.LabelBinarizer`, refer to :ref:`preprocessing_targets`. @@ -222,9 +225,11 @@ in which cell [i, j] indicates the presence of label j in sample i. :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multilabel.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multilabel.py` +* :ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` .. _ovo_classification: @@ -263,10 +268,10 @@ Below is an example of multiclass learning using OvO:: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) -.. topic:: References: +.. rubric:: References - * "Pattern Recognition and Machine Learning. Springer", - Christopher M. Bishop, page 183, (First Edition) +* "Pattern Recognition and Machine Learning. Springer", + Christopher M. Bishop, page 183, (First Edition) .. _ecoc: @@ -321,21 +326,16 @@ Below is an example of multiclass learning using Output-Codes:: 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) -.. topic:: References: +.. rubric:: References - * "Solving multiclass learning problems via error-correcting output codes", - Dietterich T., Bakiri G., - Journal of Artificial Intelligence Research 2, - 1995. +* "Solving multiclass learning problems via error-correcting output codes", + Dietterich T., Bakiri G., Journal of Artificial Intelligence Research 2, 1995. - .. [3] "The error coding method and PICTs", - James G., Hastie T., - Journal of Computational and Graphical statistics 7, - 1998. +.. [3] "The error coding method and PICTs", James G., Hastie T., + Journal of Computational and Graphical statistics 7, 1998. - * "The Elements of Statistical Learning", - Hastie T., Tibshirani R., Friedman J., page 606 (second-edition) - 2008. +* "The Elements of Statistical Learning", + Hastie T., Tibshirani R., Friedman J., page 606 (second-edition), 2008. .. _multilabel_classification: @@ -382,10 +382,13 @@ An example of the same ``y`` in sparse matrix form: >>> y_sparse = sparse.csr_matrix(y) >>> print(y_sparse) - (0, 0) 1 - (0, 3) 1 - (1, 2) 1 - (1, 3) 1 + + Coords Values + (0, 0) 1 + (0, 3) 1 + (1, 2) 1 + (1, 3) 1 .. _multioutputclassfier: @@ -432,10 +435,10 @@ one does not know the optimal ordering of the models in the chain so typically many randomly ordered chains are fit and their predictions are averaged together. -.. topic:: References: +.. rubric:: References - Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, - "Classifier Chains for Multi-label Classification", 2009. +* Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, + "Classifier Chains for Multi-label Classification", 2009. .. _multiclass_multioutput_classification: @@ -530,34 +533,34 @@ output for each sample. The following regressors natively support multioutput regression: - - :class:`cross_decomposition.CCA` - - :class:`tree.DecisionTreeRegressor` - - :class:`dummy.DummyRegressor` - - :class:`linear_model.ElasticNet` - - :class:`tree.ExtraTreeRegressor` - - :class:`ensemble.ExtraTreesRegressor` - - :class:`gaussian_process.GaussianProcessRegressor` - - :class:`neighbors.KNeighborsRegressor` - - :class:`kernel_ridge.KernelRidge` - - :class:`linear_model.Lars` - - :class:`linear_model.Lasso` - - :class:`linear_model.LassoLars` - - :class:`linear_model.LinearRegression` - - :class:`multioutput.MultiOutputRegressor` - - :class:`linear_model.MultiTaskElasticNet` - - :class:`linear_model.MultiTaskElasticNetCV` - - :class:`linear_model.MultiTaskLasso` - - :class:`linear_model.MultiTaskLassoCV` - - :class:`linear_model.OrthogonalMatchingPursuit` - - :class:`cross_decomposition.PLSCanonical` - - :class:`cross_decomposition.PLSRegression` - - :class:`linear_model.RANSACRegressor` - - :class:`neighbors.RadiusNeighborsRegressor` - - :class:`ensemble.RandomForestRegressor` - - :class:`multioutput.RegressorChain` - - :class:`linear_model.Ridge` - - :class:`linear_model.RidgeCV` - - :class:`compose.TransformedTargetRegressor` +- :class:`cross_decomposition.CCA` +- :class:`tree.DecisionTreeRegressor` +- :class:`dummy.DummyRegressor` +- :class:`linear_model.ElasticNet` +- :class:`tree.ExtraTreeRegressor` +- :class:`ensemble.ExtraTreesRegressor` +- :class:`gaussian_process.GaussianProcessRegressor` +- :class:`neighbors.KNeighborsRegressor` +- :class:`kernel_ridge.KernelRidge` +- :class:`linear_model.Lars` +- :class:`linear_model.Lasso` +- :class:`linear_model.LassoLars` +- :class:`linear_model.LinearRegression` +- :class:`multioutput.MultiOutputRegressor` +- :class:`linear_model.MultiTaskElasticNet` +- :class:`linear_model.MultiTaskElasticNetCV` +- :class:`linear_model.MultiTaskLasso` +- :class:`linear_model.MultiTaskLassoCV` +- :class:`linear_model.OrthogonalMatchingPursuit` +- :class:`cross_decomposition.PLSCanonical` +- :class:`cross_decomposition.PLSRegression` +- :class:`linear_model.RANSACRegressor` +- :class:`neighbors.RadiusNeighborsRegressor` +- :class:`ensemble.RandomForestRegressor` +- :class:`multioutput.RegressorChain` +- :class:`linear_model.Ridge` +- :class:`linear_model.RidgeCV` +- :class:`compose.TransformedTargetRegressor` Target format ------------- diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst index 05ca928dfae0b..b25334a902050 100644 --- a/doc/modules/naive_bayes.rst +++ b/doc/modules/naive_bayes.rst @@ -69,15 +69,11 @@ On the flip side, although naive Bayes is known as a decent classifier, it is known to be a bad estimator, so the probability outputs from ``predict_proba`` are not to be taken too seriously. -|details-start| -**References** -|details-split| +.. dropdown:: References -* H. Zhang (2004). `The optimality of Naive Bayes. - `_ - Proc. FLAIRS. - -|details-end| + * H. Zhang (2004). `The optimality of Naive Bayes. + `_ + Proc. FLAIRS. .. _gaussian_naive_bayes: @@ -121,7 +117,7 @@ for each class :math:`y`, where :math:`n` is the number of features and :math:`\theta_{yi}` is the probability :math:`P(x_i \mid y)` of feature :math:`i` appearing in a sample belonging to class :math:`y`. -The parameters :math:`\theta_y` is estimated by a smoothed +The parameters :math:`\theta_y` are estimated by a smoothed version of maximum likelihood, i.e. relative frequency counting: .. math:: @@ -129,13 +125,13 @@ version of maximum likelihood, i.e. relative frequency counting: \hat{\theta}_{yi} = \frac{ N_{yi} + \alpha}{N_y + \alpha n} where :math:`N_{yi} = \sum_{x \in T} x_i` is -the number of times feature :math:`i` appears in a sample of class :math:`y` +the number of times feature :math:`i` appears in all samples of class :math:`y` in the training set :math:`T`, and :math:`N_{y} = \sum_{i=1}^{n} N_{yi}` is the total count of all features for class :math:`y`. -The smoothing priors :math:`\alpha \ge 0` accounts for -features not present in the learning samples and prevents zero probabilities +The smoothing priors :math:`\alpha \ge 0` account for +features not present in the learning samples and prevent zero probabilities in further computations. Setting :math:`\alpha = 1` is called Laplace smoothing, while :math:`\alpha < 1` is called Lidstone smoothing. @@ -153,47 +149,40 @@ The inventors of CNB show empirically that the parameter estimates for CNB are more stable than those for MNB. Further, CNB regularly outperforms MNB (often by a considerable margin) on text classification tasks. -|details-start| -**Weights calculation** -|details-split| - -The procedure for calculating the weights is as follows: +.. dropdown:: Weights calculation -.. math:: + The procedure for calculating the weights is as follows: - \hat{\theta}_{ci} = \frac{\alpha_i + \sum_{j:y_j \neq c} d_{ij}} - {\alpha + \sum_{j:y_j \neq c} \sum_{k} d_{kj}} + .. math:: - w_{ci} = \log \hat{\theta}_{ci} + \hat{\theta}_{ci} = \frac{\alpha_i + \sum_{j:y_j \neq c} d_{ij}} + {\alpha + \sum_{j:y_j \neq c} \sum_{k} d_{kj}} - w_{ci} = \frac{w_{ci}}{\sum_{j} |w_{cj}|} + w_{ci} = \log \hat{\theta}_{ci} -where the summations are over all documents :math:`j` not in class :math:`c`, -:math:`d_{ij}` is either the count or tf-idf value of term :math:`i` in document -:math:`j`, :math:`\alpha_i` is a smoothing hyperparameter like that found in -MNB, and :math:`\alpha = \sum_{i} \alpha_i`. The second normalization addresses -the tendency for longer documents to dominate parameter estimates in MNB. The -classification rule is: + w_{ci} = \frac{w_{ci}}{\sum_{j} |w_{cj}|} -.. math:: + where the summations are over all documents :math:`j` not in class :math:`c`, + :math:`d_{ij}` is either the count or tf-idf value of term :math:`i` in document + :math:`j`, :math:`\alpha_i` is a smoothing hyperparameter like that found in + MNB, and :math:`\alpha = \sum_{i} \alpha_i`. The second normalization addresses + the tendency for longer documents to dominate parameter estimates in MNB. The + classification rule is: - \hat{c} = \arg\min_c \sum_{i} t_i w_{ci} + .. math:: -i.e., a document is assigned to the class that is the *poorest* complement -match. + \hat{c} = \arg\min_c \sum_{i} t_i w_{ci} -|details-end| + i.e., a document is assigned to the class that is the *poorest* complement + match. -|details-start| -**References** -|details-split| +.. dropdown:: References -* Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). - `Tackling the poor assumptions of naive bayes text classifiers. - `_ - In ICML (Vol. 3, pp. 616-623). + * Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). + `Tackling the poor assumptions of naive bayes text classifiers. + `_ + In ICML (Vol. 3, pp. 616-623). -|details-end| .. _bernoulli_naive_bayes: @@ -224,24 +213,21 @@ count vectors) may be used to train and use this classifier. :class:`BernoulliNB might perform better on some datasets, especially those with shorter documents. It is advisable to evaluate both models, if time permits. -|details-start| -**References** -|details-split| +.. dropdown:: References -* C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to - Information Retrieval. Cambridge University Press, pp. 234-265. + * C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to + Information Retrieval. Cambridge University Press, pp. 234-265. -* A. McCallum and K. Nigam (1998). - `A comparison of event models for Naive Bayes text classification. - `_ - Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. + * A. McCallum and K. Nigam (1998). + `A comparison of event models for Naive Bayes text classification. + `_ + Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. -* V. Metsis, I. Androutsopoulos and G. Paliouras (2006). - `Spam filtering with Naive Bayes -- Which Naive Bayes? - `_ - 3rd Conf. on Email and Anti-Spam (CEAS). + * V. Metsis, I. Androutsopoulos and G. Paliouras (2006). + `Spam filtering with Naive Bayes -- Which Naive Bayes? + `_ + 3rd Conf. on Email and Anti-Spam (CEAS). -|details-end| .. _categorical_naive_bayes: @@ -258,25 +244,21 @@ For each feature :math:`i` in the training set :math:`X`, of X conditioned on the class y. The index set of the samples is defined as :math:`J = \{ 1, \dots, m \}`, with :math:`m` as the number of samples. -|details-start| -**Probability calculation** -|details-split| - -The probability of category :math:`t` in feature :math:`i` given class -:math:`c` is estimated as: +.. dropdown:: Probability calculation -.. math:: + The probability of category :math:`t` in feature :math:`i` given class + :math:`c` is estimated as: - P(x_i = t \mid y = c \: ;\, \alpha) = \frac{ N_{tic} + \alpha}{N_{c} + - \alpha n_i}, + .. math:: -where :math:`N_{tic} = |\{j \in J \mid x_{ij} = t, y_j = c\}|` is the number -of times category :math:`t` appears in the samples :math:`x_{i}`, which belong -to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number -of samples with class c, :math:`\alpha` is a smoothing parameter and -:math:`n_i` is the number of available categories of feature :math:`i`. + P(x_i = t \mid y = c \: ;\, \alpha) = \frac{ N_{tic} + \alpha}{N_{c} + + \alpha n_i}, -|details-end| + where :math:`N_{tic} = |\{j \in J \mid x_{ij} = t, y_j = c\}|` is the number + of times category :math:`t` appears in the samples :math:`x_{i}`, which belong + to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number + of samples with class c, :math:`\alpha` is a smoothing parameter and + :math:`n_i` is the number of available categories of feature :math:`i`. :class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded (for instance with the help of :class:`~sklearn.preprocessing.OrdinalEncoder`) such diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index b081b29572d8a..82caa397b60d2 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -192,10 +192,10 @@ distance can be supplied to compute the weights. .. centered:: |classification_1| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`: an example of - classification using nearest neighbors. +* :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`: an example of + classification using nearest neighbors. .. _regression: @@ -241,13 +241,13 @@ the lower half of those faces. :align: center -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_regression.py`: an example of regression - using nearest neighbors. +* :ref:`sphx_glr_auto_examples_neighbors_plot_regression.py`: an example of regression + using nearest neighbors. - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`: an example of - multi-output regression using nearest neighbors. +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`: + an example of multi-output regression using nearest neighbors. Nearest Neighbor Algorithms @@ -304,15 +304,13 @@ In scikit-learn, KD tree neighbors searches are specified using the keyword ``algorithm = 'kd_tree'``, and are computed using the class :class:`KDTree`. -|details-start| -**References** -|details-split| - * `"Multidimensional binary search trees used for associative searching" - `_, - Bentley, J.L., Communications of the ACM (1975) +.. dropdown:: References + + * `"Multidimensional binary search trees used for associative searching" + `_, + Bentley, J.L., Communications of the ACM (1975) -|details-end| .. _ball_tree: @@ -345,156 +343,142 @@ neighbors searches are specified using the keyword ``algorithm = 'ball_tree'``, and are computed using the class :class:`BallTree`. Alternatively, the user can work with the :class:`BallTree` class directly. -|details-start| -**References** -|details-split| - - * `"Five Balltree Construction Algorithms" - `_, - Omohundro, S.M., International Computer Science Institute - Technical Report (1989) - -|details-end| - -|details-start| -**Choice of Nearest Neighbors Algorithm** -|details-split| - -The optimal algorithm for a given dataset is a complicated choice, and -depends on a number of factors: - -* number of samples :math:`N` (i.e. ``n_samples``) and dimensionality - :math:`D` (i.e. ``n_features``). - - * *Brute force* query time grows as :math:`O[D N]` - * *Ball tree* query time grows as approximately :math:`O[D \log(N)]` - * *KD tree* query time changes with :math:`D` in a way that is difficult - to precisely characterise. For small :math:`D` (less than 20 or so) - the cost is approximately :math:`O[D\log(N)]`, and the KD tree - query can be very efficient. - For larger :math:`D`, the cost increases to nearly :math:`O[DN]`, and - the overhead due to the tree - structure can lead to queries which are slower than brute force. - - For small data sets (:math:`N` less than 30 or so), :math:`\log(N)` is - comparable to :math:`N`, and brute force algorithms can be more efficient - than a tree-based approach. Both :class:`KDTree` and :class:`BallTree` - address this through providing a *leaf size* parameter: this controls the - number of samples at which a query switches to brute-force. This allows both - algorithms to approach the efficiency of a brute-force computation for small - :math:`N`. - -* data structure: *intrinsic dimensionality* of the data and/or *sparsity* - of the data. Intrinsic dimensionality refers to the dimension - :math:`d \le D` of a manifold on which the data lies, which can be linearly - or non-linearly embedded in the parameter space. Sparsity refers to the - degree to which the data fills the parameter space (this is to be - distinguished from the concept as used in "sparse" matrices. The data - matrix may have no zero entries, but the **structure** can still be - "sparse" in this sense). - - * *Brute force* query time is unchanged by data structure. - * *Ball tree* and *KD tree* query times can be greatly influenced - by data structure. In general, sparser data with a smaller intrinsic - dimensionality leads to faster query times. Because the KD tree - internal representation is aligned with the parameter axes, it will not - generally show as much improvement as ball tree for arbitrarily - structured data. - - Datasets used in machine learning tend to be very structured, and are - very well-suited for tree-based queries. - -* number of neighbors :math:`k` requested for a query point. - - * *Brute force* query time is largely unaffected by the value of :math:`k` - * *Ball tree* and *KD tree* query time will become slower as :math:`k` - increases. This is due to two effects: first, a larger :math:`k` leads - to the necessity to search a larger portion of the parameter space. - Second, using :math:`k > 1` requires internal queueing of results - as the tree is traversed. - - As :math:`k` becomes large compared to :math:`N`, the ability to prune - branches in a tree-based query is reduced. In this situation, Brute force - queries can be more efficient. - -* number of query points. Both the ball tree and the KD Tree - require a construction phase. The cost of this construction becomes - negligible when amortized over many queries. If only a small number of - queries will be performed, however, the construction can make up - a significant fraction of the total cost. If very few query points - will be required, brute force is better than a tree-based method. - -Currently, ``algorithm = 'auto'`` selects ``'brute'`` if any of the following -conditions are verified: - -* input data is sparse -* ``metric = 'precomputed'`` -* :math:`D > 15` -* :math:`k >= N/2` -* ``effective_metric_`` isn't in the ``VALID_METRICS`` list for either - ``'kd_tree'`` or ``'ball_tree'`` - -Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'`` that -has ``effective_metric_`` in its ``VALID_METRICS`` list. This heuristic is -based on the following assumptions: - -* the number of query points is at least the same order as the number of - training points -* ``leaf_size`` is close to its default value of ``30`` -* when :math:`D > 15`, the intrinsic dimensionality of the data is generally - too high for tree-based methods - -|details-end| - -|details-start| -**Effect of ``leaf_size``** -|details-split| - -As noted above, for small sample sizes a brute force search can be more -efficient than a tree-based query. This fact is accounted for in the ball -tree and KD tree by internally switching to brute force searches within -leaf nodes. The level of this switch can be specified with the parameter -``leaf_size``. This parameter choice has many effects: - -**construction time** - A larger ``leaf_size`` leads to a faster tree construction time, because - fewer nodes need to be created - -**query time** - Both a large or small ``leaf_size`` can lead to suboptimal query cost. - For ``leaf_size`` approaching 1, the overhead involved in traversing - nodes can significantly slow query times. For ``leaf_size`` approaching - the size of the training set, queries become essentially brute force. - A good compromise between these is ``leaf_size = 30``, the default value - of the parameter. - -**memory** - As ``leaf_size`` increases, the memory required to store a tree structure - decreases. This is especially important in the case of ball tree, which - stores a :math:`D`-dimensional centroid for each node. The required - storage space for :class:`BallTree` is approximately ``1 / leaf_size`` times - the size of the training set. - -``leaf_size`` is not referenced for brute force queries. -|details-end| - -|details-start| -**Valid Metrics for Nearest Neighbor Algorithms** -|details-split| - -For a list of available metrics, see the documentation of the -:class:`~sklearn.metrics.DistanceMetric` class and the metrics listed in -`sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the "cosine" -metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. - -A list of valid metrics for any of the above algorithms can be obtained by using their -``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by: - - >>> from sklearn.neighbors import KDTree - >>> print(sorted(KDTree.valid_metrics)) - ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p'] -|details-end| +.. dropdown:: References + + * `"Five Balltree Construction Algorithms" + `_, + Omohundro, S.M., International Computer Science Institute + Technical Report (1989) + +.. dropdown:: Choice of Nearest Neighbors Algorithm + + The optimal algorithm for a given dataset is a complicated choice, and + depends on a number of factors: + + * number of samples :math:`N` (i.e. ``n_samples``) and dimensionality + :math:`D` (i.e. ``n_features``). + + * *Brute force* query time grows as :math:`O[D N]` + * *Ball tree* query time grows as approximately :math:`O[D \log(N)]` + * *KD tree* query time changes with :math:`D` in a way that is difficult + to precisely characterise. For small :math:`D` (less than 20 or so) + the cost is approximately :math:`O[D\log(N)]`, and the KD tree + query can be very efficient. + For larger :math:`D`, the cost increases to nearly :math:`O[DN]`, and + the overhead due to the tree + structure can lead to queries which are slower than brute force. + + For small data sets (:math:`N` less than 30 or so), :math:`\log(N)` is + comparable to :math:`N`, and brute force algorithms can be more efficient + than a tree-based approach. Both :class:`KDTree` and :class:`BallTree` + address this through providing a *leaf size* parameter: this controls the + number of samples at which a query switches to brute-force. This allows both + algorithms to approach the efficiency of a brute-force computation for small + :math:`N`. + + * data structure: *intrinsic dimensionality* of the data and/or *sparsity* + of the data. Intrinsic dimensionality refers to the dimension + :math:`d \le D` of a manifold on which the data lies, which can be linearly + or non-linearly embedded in the parameter space. Sparsity refers to the + degree to which the data fills the parameter space (this is to be + distinguished from the concept as used in "sparse" matrices. The data + matrix may have no zero entries, but the **structure** can still be + "sparse" in this sense). + + * *Brute force* query time is unchanged by data structure. + * *Ball tree* and *KD tree* query times can be greatly influenced + by data structure. In general, sparser data with a smaller intrinsic + dimensionality leads to faster query times. Because the KD tree + internal representation is aligned with the parameter axes, it will not + generally show as much improvement as ball tree for arbitrarily + structured data. + + Datasets used in machine learning tend to be very structured, and are + very well-suited for tree-based queries. + + * number of neighbors :math:`k` requested for a query point. + + * *Brute force* query time is largely unaffected by the value of :math:`k` + * *Ball tree* and *KD tree* query time will become slower as :math:`k` + increases. This is due to two effects: first, a larger :math:`k` leads + to the necessity to search a larger portion of the parameter space. + Second, using :math:`k > 1` requires internal queueing of results + as the tree is traversed. + + As :math:`k` becomes large compared to :math:`N`, the ability to prune + branches in a tree-based query is reduced. In this situation, Brute force + queries can be more efficient. + + * number of query points. Both the ball tree and the KD Tree + require a construction phase. The cost of this construction becomes + negligible when amortized over many queries. If only a small number of + queries will be performed, however, the construction can make up + a significant fraction of the total cost. If very few query points + will be required, brute force is better than a tree-based method. + + Currently, ``algorithm = 'auto'`` selects ``'brute'`` if any of the following + conditions are verified: + + * input data is sparse + * ``metric = 'precomputed'`` + * :math:`D > 15` + * :math:`k >= N/2` + * ``effective_metric_`` isn't in the ``VALID_METRICS`` list for either + ``'kd_tree'`` or ``'ball_tree'`` + + Otherwise, it selects the first out of ``'kd_tree'`` and ``'ball_tree'`` that + has ``effective_metric_`` in its ``VALID_METRICS`` list. This heuristic is + based on the following assumptions: + + * the number of query points is at least the same order as the number of + training points + * ``leaf_size`` is close to its default value of ``30`` + * when :math:`D > 15`, the intrinsic dimensionality of the data is generally + too high for tree-based methods + +.. dropdown:: Effect of ``leaf_size`` + + As noted above, for small sample sizes a brute force search can be more + efficient than a tree-based query. This fact is accounted for in the ball + tree and KD tree by internally switching to brute force searches within + leaf nodes. The level of this switch can be specified with the parameter + ``leaf_size``. This parameter choice has many effects: + + **construction time** + A larger ``leaf_size`` leads to a faster tree construction time, because + fewer nodes need to be created + + **query time** + Both a large or small ``leaf_size`` can lead to suboptimal query cost. + For ``leaf_size`` approaching 1, the overhead involved in traversing + nodes can significantly slow query times. For ``leaf_size`` approaching + the size of the training set, queries become essentially brute force. + A good compromise between these is ``leaf_size = 30``, the default value + of the parameter. + + **memory** + As ``leaf_size`` increases, the memory required to store a tree structure + decreases. This is especially important in the case of ball tree, which + stores a :math:`D`-dimensional centroid for each node. The required + storage space for :class:`BallTree` is approximately ``1 / leaf_size`` times + the size of the training set. + + ``leaf_size`` is not referenced for brute force queries. + +.. dropdown:: Valid Metrics for Nearest Neighbor Algorithms + + For a list of available metrics, see the documentation of the + :class:`~sklearn.metrics.DistanceMetric` class and the metrics listed in + `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the "cosine" + metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. + + A list of valid metrics for any of the above algorithms can be obtained by using their + ``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by: + + >>> from sklearn.neighbors import KDTree + >>> print(sorted(KDTree.valid_metrics)) + ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p'] .. _nearest_centroid_classifier: @@ -547,10 +531,10 @@ the model from 0.81 to 0.82. .. centered:: |nearest_centroid_1| |nearest_centroid_2| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_nearest_centroid.py`: an example of - classification using nearest centroid with different shrink thresholds. +* :ref:`sphx_glr_auto_examples_neighbors_plot_nearest_centroid.py`: an example of + classification using nearest centroid with different shrink thresholds. .. _neighbors_transformer: @@ -576,7 +560,7 @@ a scikit-learn pipeline, one can also use the corresponding classes :class:`KNeighborsTransformer` and :class:`RadiusNeighborsTransformer`. The benefits of this sparse graph API are multiple. -First, the precomputed graph can be re-used multiple times, for instance while +First, the precomputed graph can be reused multiple times, for instance while varying a parameter of the estimator. This can be done manually by the user, or using the caching properties of the scikit-learn pipeline: @@ -635,17 +619,17 @@ implementation with special data types. The precomputed neighbors include one extra neighbor in a custom nearest neighbors estimator, since unnecessary neighbors will be filtered by following estimators. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_approximate_nearest_neighbors.py`: - an example of pipelining :class:`KNeighborsTransformer` and - :class:`~sklearn.manifold.TSNE`. Also proposes two custom nearest neighbors - estimators based on external packages. +* :ref:`sphx_glr_auto_examples_neighbors_approximate_nearest_neighbors.py`: + an example of pipelining :class:`KNeighborsTransformer` and + :class:`~sklearn.manifold.TSNE`. Also proposes two custom nearest neighbors + estimators based on external packages. - * :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py`: - an example of pipelining :class:`KNeighborsTransformer` and - :class:`KNeighborsClassifier` to enable caching of the neighbors graph - during a hyper-parameter grid-search. +* :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py`: + an example of pipelining :class:`KNeighborsTransformer` and + :class:`KNeighborsClassifier` to enable caching of the neighbors graph + during a hyper-parameter grid-search. .. _nca: @@ -769,11 +753,11 @@ by each method. Each data sample belongs to one of 10 classes. .. centered:: |nca_dim_reduction_1| |nca_dim_reduction_2| |nca_dim_reduction_3| -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neighbors_plot_nca_classification.py` - * :ref:`sphx_glr_auto_examples_neighbors_plot_nca_dim_reduction.py` - * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` +* :ref:`sphx_glr_auto_examples_neighbors_plot_nca_classification.py` +* :ref:`sphx_glr_auto_examples_neighbors_plot_nca_dim_reduction.py` +* :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` .. _nca_mathematical_formulation: @@ -806,20 +790,17 @@ space: p_{i j} = \frac{\exp(-||L x_i - L x_j||^2)}{\sum\limits_{k \ne i} {\exp{-(||L x_i - L x_k||^2)}}} , \quad p_{i i} = 0 -|details-start| -**Mahalanobis distance** -|details-split| +.. dropdown:: Mahalanobis distance -NCA can be seen as learning a (squared) Mahalanobis distance metric: + NCA can be seen as learning a (squared) Mahalanobis distance metric: -.. math:: + .. math:: - || L(x_i - x_j)||^2 = (x_i - x_j)^TM(x_i - x_j), + || L(x_i - x_j)||^2 = (x_i - x_j)^TM(x_i - x_j), -where :math:`M = L^T L` is a symmetric positive semi-definite matrix of size -``(n_features, n_features)``. + where :math:`M = L^T L` is a symmetric positive semi-definite matrix of size + ``(n_features, n_features)``. -|details-end| Implementation -------------- @@ -851,14 +832,12 @@ complexity equals ``n_components * n_features * n_samples_test``. There is no added space complexity in the operation. -.. topic:: References: - - .. [1] `"Neighbourhood Components Analysis" - `_, - J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in - Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520. +.. rubric:: References - `Wikipedia entry on Neighborhood Components Analysis - `_ +.. [1] `"Neighbourhood Components Analysis" + `_, + J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov, Advances in + Neural Information Processing Systems, Vol. 17, May 2005, pp. 513-520. -|details-end| +* `Wikipedia entry on Neighborhood Components Analysis + `_ diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst index 95d0a1be38238..13611b7f52775 100644 --- a/doc/modules/neural_networks_supervised.rst +++ b/doc/modules/neural_networks_supervised.rst @@ -49,33 +49,30 @@ The module contains the public attributes ``coefs_`` and ``intercepts_``. :math:`i+1`. ``intercepts_`` is a list of bias vectors, where the vector at index :math:`i` represents the bias values added to layer :math:`i+1`. -|details-start| -**Advantages and disadvantages of Multi-layer Perceptron** -|details-split| +.. dropdown:: Advantages and disadvantages of Multi-layer Perceptron -The advantages of Multi-layer Perceptron are: + The advantages of Multi-layer Perceptron are: -+ Capability to learn non-linear models. + + Capability to learn non-linear models. -+ Capability to learn models in real-time (on-line learning) - using ``partial_fit``. + + Capability to learn models in real-time (on-line learning) + using ``partial_fit``. -The disadvantages of Multi-layer Perceptron (MLP) include: + The disadvantages of Multi-layer Perceptron (MLP) include: -+ MLP with hidden layers have a non-convex loss function where there exists - more than one local minimum. Therefore different random weight - initializations can lead to different validation accuracy. + + MLP with hidden layers has a non-convex loss function where there exists + more than one local minimum. Therefore, different random weight + initializations can lead to different validation accuracy. -+ MLP requires tuning a number of hyperparameters such as the number of - hidden neurons, layers, and iterations. + + MLP requires tuning a number of hyperparameters such as the number of + hidden neurons, layers, and iterations. -+ MLP is sensitive to feature scaling. + + MLP is sensitive to feature scaling. -Please see :ref:`Tips on Practical Use ` section that addresses -some of these disadvantages. + Please see :ref:`Tips on Practical Use ` section that addresses + some of these disadvantages. -|details-end| Classification ============== @@ -119,8 +116,8 @@ classification, it minimizes the Cross-Entropy loss function, giving a vector of probability estimates :math:`P(y|x)` per sample :math:`x`:: >>> clf.predict_proba([[2., 2.], [1., 2.]]) - array([[1.967...e-04, 9.998...-01], - [1.967...e-04, 9.998...-01]]) + array([[1.967e-04, 9.998e-01], + [1.967e-04, 9.998e-01]]) :class:`MLPClassifier` supports multi-class classification by applying `Softmax `_ @@ -130,7 +127,7 @@ Further, the model supports :ref:`multi-label classification ` in which a sample can belong to more than one class. For each class, the raw output passes through the logistic function. Values larger or equal to `0.5` are rounded to `1`, otherwise to `0`. For a predicted output of a sample, the -indices where the value is `1` represents the assigned classes of that sample:: +indices where the value is `1` represent the assigned classes of that sample:: >>> X = [[0., 0.], [1., 1.]] >>> y = [[0, 1], [1, 1]] @@ -148,11 +145,11 @@ indices where the value is `1` represents the assigned classes of that sample:: See the examples below and the docstring of :meth:`MLPClassifier.fit` for further information. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_training_curves.py` - * See :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` for - visualized representation of trained weights. +* :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_training_curves.py` +* See :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` for + visualized representation of trained weights. Regression ========== @@ -181,9 +178,9 @@ decision function with value of alpha. See the examples below for further information. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_alpha.py` +* :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_alpha.py` Algorithms ========== @@ -229,88 +226,83 @@ Complexity Suppose there are :math:`n` training samples, :math:`m` features, :math:`k` hidden layers, each containing :math:`h` neurons - for simplicity, and :math:`o` output neurons. The time complexity of backpropagation is -:math:`O(n\cdot m \cdot h^k \cdot o \cdot i)`, where :math:`i` is the number +:math:`O(i \cdot n \cdot (m \cdot h + (k - 1) \cdot h \cdot h + h \cdot o))`, where :math:`i` is the number of iterations. Since backpropagation has a high time complexity, it is advisable to start with smaller number of hidden neurons and few hidden layers for training. -|details-start| -Mathematical formulation -|details-split| +.. dropdown:: Mathematical formulation -Given a set of training examples :math:`(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)` -where :math:`x_i \in \mathbf{R}^n` and :math:`y_i \in \{0, 1\}`, a one hidden -layer one hidden neuron MLP learns the function :math:`f(x) = W_2 g(W_1^T x + b_1) + b_2` -where :math:`W_1 \in \mathbf{R}^m` and :math:`W_2, b_1, b_2 \in \mathbf{R}` are -model parameters. :math:`W_1, W_2` represent the weights of the input layer and -hidden layer, respectively; and :math:`b_1, b_2` represent the bias added to -the hidden layer and the output layer, respectively. -:math:`g(\cdot) : R \rightarrow R` is the activation function, set by default as -the hyperbolic tan. It is given as, + Given a set of training examples :math:`(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)` + where :math:`x_i \in \mathbf{R}^n` and :math:`y_i \in \{0, 1\}`, a one hidden + layer one hidden neuron MLP learns the function :math:`f(x) = W_2 g(W_1^T x + b_1) + b_2` + where :math:`W_1 \in \mathbf{R}^m` and :math:`W_2, b_1, b_2 \in \mathbf{R}` are + model parameters. :math:`W_1, W_2` represent the weights of the input layer and + hidden layer, respectively; and :math:`b_1, b_2` represent the bias added to + the hidden layer and the output layer, respectively. + :math:`g(\cdot) : R \rightarrow R` is the activation function, set by default as + the hyperbolic tan. It is given as, -.. math:: - g(z)= \frac{e^z-e^{-z}}{e^z+e^{-z}} - -For binary classification, :math:`f(x)` passes through the logistic function -:math:`g(z)=1/(1+e^{-z})` to obtain output values between zero and one. A -threshold, set to 0.5, would assign samples of outputs larger or equal 0.5 -to the positive class, and the rest to the negative class. + .. math:: + g(z)= \frac{e^z-e^{-z}}{e^z+e^{-z}} -If there are more than two classes, :math:`f(x)` itself would be a vector of -size (n_classes,). Instead of passing through logistic function, it passes -through the softmax function, which is written as, - -.. math:: - \text{softmax}(z)_i = \frac{\exp(z_i)}{\sum_{l=1}^k\exp(z_l)} + For binary classification, :math:`f(x)` passes through the logistic function + :math:`g(z)=1/(1+e^{-z})` to obtain output values between zero and one. A + threshold, set to 0.5, would assign samples of outputs larger or equal 0.5 + to the positive class, and the rest to the negative class. -where :math:`z_i` represents the :math:`i` th element of the input to softmax, -which corresponds to class :math:`i`, and :math:`K` is the number of classes. -The result is a vector containing the probabilities that sample :math:`x` -belong to each class. The output is the class with the highest probability. + If there are more than two classes, :math:`f(x)` itself would be a vector of + size (n_classes,). Instead of passing through logistic function, it passes + through the softmax function, which is written as, -In regression, the output remains as :math:`f(x)`; therefore, output activation -function is just the identity function. + .. math:: + \text{softmax}(z)_i = \frac{\exp(z_i)}{\sum_{l=1}^k\exp(z_l)} -MLP uses different loss functions depending on the problem type. The loss -function for classification is Average Cross-Entropy, which in binary case is -given as, + where :math:`z_i` represents the :math:`i` th element of the input to softmax, + which corresponds to class :math:`i`, and :math:`K` is the number of classes. + The result is a vector containing the probabilities that sample :math:`x` + belongs to each class. The output is the class with the highest probability. -.. math:: + In regression, the output remains as :math:`f(x)`; therefore, output activation + function is just the identity function. - Loss(\hat{y},y,W) = -\dfrac{1}{n}\sum_{i=0}^n(y_i \ln {\hat{y_i}} + (1-y_i) \ln{(1-\hat{y_i})}) + \dfrac{\alpha}{2n} ||W||_2^2 + MLP uses different loss functions depending on the problem type. The loss + function for classification is Average Cross-Entropy, which in binary case is + given as, -where :math:`\alpha ||W||_2^2` is an L2-regularization term (aka penalty) -that penalizes complex models; and :math:`\alpha > 0` is a non-negative -hyperparameter that controls the magnitude of the penalty. + .. math:: -For regression, MLP uses the Mean Square Error loss function; written as, + Loss(\hat{y},y,W) = -\dfrac{1}{n}\sum_{i=0}^n(y_i \ln {\hat{y_i}} + (1-y_i) \ln{(1-\hat{y_i})}) + \dfrac{\alpha}{2n} ||W||_2^2 -.. math:: + where :math:`\alpha ||W||_2^2` is an L2-regularization term (aka penalty) + that penalizes complex models; and :math:`\alpha > 0` is a non-negative + hyperparameter that controls the magnitude of the penalty. - Loss(\hat{y},y,W) = \frac{1}{2n}\sum_{i=0}^n||\hat{y}_i - y_i ||_2^2 + \frac{\alpha}{2n} ||W||_2^2 + For regression, MLP uses the Mean Square Error loss function; written as, + .. math:: -Starting from initial random weights, multi-layer perceptron (MLP) minimizes -the loss function by repeatedly updating these weights. After computing the -loss, a backward pass propagates it from the output layer to the previous -layers, providing each weight parameter with an update value meant to decrease -the loss. + Loss(\hat{y},y,W) = \frac{1}{2n}\sum_{i=0}^n||\hat{y}_i - y_i ||_2^2 + \frac{\alpha}{2n} ||W||_2^2 -In gradient descent, the gradient :math:`\nabla Loss_{W}` of the loss with respect -to the weights is computed and deducted from :math:`W`. -More formally, this is expressed as, + Starting from initial random weights, multi-layer perceptron (MLP) minimizes + the loss function by repeatedly updating these weights. After computing the + loss, a backward pass propagates it from the output layer to the previous + layers, providing each weight parameter with an update value meant to decrease + the loss. -.. math:: - W^{i+1} = W^i - \epsilon \nabla {Loss}_{W}^{i} + In gradient descent, the gradient :math:`\nabla Loss_{W}` of the loss with respect + to the weights is computed and deducted from :math:`W`. + More formally, this is expressed as, + .. math:: + W^{i+1} = W^i - \epsilon \nabla {Loss}_{W}^{i} -where :math:`i` is the iteration step, and :math:`\epsilon` is the learning rate -with a value larger than 0. + where :math:`i` is the iteration step, and :math:`\epsilon` is the learning rate + with a value larger than 0. -The algorithm stops when it reaches a preset maximum number of iterations; or -when the improvement in loss is below a certain, small number. + The algorithm stops when it reaches a preset maximum number of iterations; or + when the improvement in loss is below a certain, small number. -|details-end| .. _mlp_tips: @@ -361,25 +353,19 @@ or want to do additional monitoring, using ``warm_start=True`` and ... # additional monitoring / inspection MLPClassifier(... -|details-start| -**References** -|details-split| - - * `"Learning representations by back-propagating errors." - `_ - Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams. +.. dropdown:: References - * `"Stochastic Gradient Descent" `_ L. Bottou - Website, 2010. + * `"Learning representations by back-propagating errors." + `_ + Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams. - * `"Backpropagation" `_ - Andrew Ng, Jiquan Ngiam, Chuan Yu Foo, Yifan Mai, Caroline Suen - Website, 2011. + * `"Stochastic Gradient Descent" `_ L. Bottou - Website, 2010. - * `"Efficient BackProp" `_ - Y. LeCun, L. Bottou, G. Orr, K. Müller - In Neural Networks: Tricks - of the Trade 1998. + * `"Backpropagation" `_ + Andrew Ng, Jiquan Ngiam, Chuan Yu Foo, Yifan Mai, Caroline Suen - Website, 2011. - * :arxiv:`"Adam: A method for stochastic optimization." - <1412.6980>` - Kingma, Diederik, and Jimmy Ba (2014) + * `"Efficient BackProp" `_ + Y. LeCun, L. Bottou, G. Orr, K. Müller - In Neural Networks: Tricks of the Trade 1998. -|details-end| + * :arxiv:`"Adam: A method for stochastic optimization." <1412.6980>` + Kingma, Diederik, and Jimmy Ba (2014) diff --git a/doc/modules/neural_networks_unsupervised.rst b/doc/modules/neural_networks_unsupervised.rst index aca56ae8aaf2e..7f6c0016d183b 100644 --- a/doc/modules/neural_networks_unsupervised.rst +++ b/doc/modules/neural_networks_unsupervised.rst @@ -37,9 +37,9 @@ weights of independent RBMs. This method is known as unsupervised pre-training. :align: center :scale: 100% -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_neural_networks_plot_rbm_logistic_classification.py` +* :ref:`sphx_glr_auto_examples_neural_networks_plot_rbm_logistic_classification.py` Graphical model and parametrization @@ -57,7 +57,7 @@ visible and hidden unit, omitted from the image for simplicity. The energy function measures the quality of a joint assignment: -.. math:: +.. math:: E(\mathbf{v}, \mathbf{h}) = -\sum_i \sum_j w_{ij}v_ih_j - \sum_i b_iv_i - \sum_j c_jh_j @@ -149,13 +149,13 @@ step, in PCD we keep a number of chains (fantasy particles) that are updated :math:`k` Gibbs steps after each weight update. This allows the particles to explore the space more thoroughly. -.. topic:: References: +.. rubric:: References - * `"A fast learning algorithm for deep belief nets" - `_ - G. Hinton, S. Osindero, Y.-W. Teh, 2006 +* `"A fast learning algorithm for deep belief nets" + `_, + G. Hinton, S. Osindero, Y.-W. Teh, 2006 - * `"Training Restricted Boltzmann Machines using Approximations to - the Likelihood Gradient" - `_ - T. Tieleman, 2008 +* `"Training Restricted Boltzmann Machines using Approximations to + the Likelihood Gradient" + `_, + T. Tieleman, 2008 diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst index d003b645eb19c..7de2da4f1818e 100644 --- a/doc/modules/outlier_detection.rst +++ b/doc/modules/outlier_detection.rst @@ -123,19 +123,19 @@ refer to the example :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` and the sections hereunder. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison of the :class:`svm.OneClassSVM`, the - :class:`ensemble.IsolationForest`, the - :class:`neighbors.LocalOutlierFactor` and - :class:`covariance.EllipticEnvelope`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison of the :class:`svm.OneClassSVM`, the + :class:`ensemble.IsolationForest`, the + :class:`neighbors.LocalOutlierFactor` and + :class:`covariance.EllipticEnvelope`. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_outlier_detection_bench.py` - for an example showing how to evaluate outlier detection estimators, - the :class:`neighbors.LocalOutlierFactor` and the - :class:`ensemble.IsolationForest`, using ROC curves from - :class:`metrics.RocCurveDisplay`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_outlier_detection_bench.py` + for an example showing how to evaluate outlier detection estimators, + the :class:`neighbors.LocalOutlierFactor` and the + :class:`ensemble.IsolationForest`, using ROC curves from + :class:`metrics.RocCurveDisplay`. Novelty Detection ================= @@ -153,7 +153,7 @@ In general, it is about to learn a rough, close frontier delimiting the contour of the initial observations distribution, plotted in embedding :math:`p`-dimensional space. Then, if further observations lay within the frontier-delimited subspace, they are considered as -coming from the same population than the initial +coming from the same population as the initial observations. Otherwise, if they lay outside the frontier, we can say that they are abnormal with a given confidence in our assessment. @@ -167,18 +167,18 @@ implementation. The `nu` parameter, also known as the margin of the One-Class SVM, corresponds to the probability of finding a new, but regular, observation outside the frontier. -.. topic:: References: +.. rubric:: References - * `Estimating the support of a high-dimensional distribution - `_ - Schölkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471. +* `Estimating the support of a high-dimensional distribution + `_ + Schölkopf, Bernhard, et al. Neural computation 13.7 (2001): 1443-1471. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py` for visualizing the - frontier learned around some data by a - :class:`svm.OneClassSVM` object. - * :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` +* See :ref:`sphx_glr_auto_examples_svm_plot_oneclass.py` for visualizing the + frontier learned around some data by a :class:`svm.OneClassSVM` object. + +* :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` .. figure:: ../auto_examples/svm/images/sphx_glr_plot_oneclass_001.png :target: ../auto_examples/svm/plot_oneclass.html @@ -196,11 +196,11 @@ approximate the solution of a kernelized :class:`svm.OneClassSVM` whose complexity is at best quadratic in the number of samples. See section :ref:`sgd_online_one_class_svm` for more details. -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py` - for an illustration of the approximation of a kernelized One-Class SVM - with the `linear_model.SGDOneClassSVM` combined with kernel approximation. +* See :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py` + for an illustration of the approximation of a kernelized One-Class SVM + with the `linear_model.SGDOneClassSVM` combined with kernel approximation. Outlier Detection @@ -230,7 +230,7 @@ points, ignoring points outside the central mode. For instance, assuming that the inlier data are Gaussian distributed, it will estimate the inlier location and covariance in a robust way (i.e. without being influenced by outliers). The Mahalanobis distances -obtained from this estimate is used to derive a measure of outlyingness. +obtained from this estimate are used to derive a measure of outlyingness. This strategy is illustrated below. .. figure:: ../auto_examples/covariance/images/sphx_glr_plot_mahalanobis_distances_001.png @@ -238,18 +238,22 @@ This strategy is illustrated below. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples + +* See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` for + an illustration of the difference between using a standard + (:class:`covariance.EmpiricalCovariance`) or a robust estimate + (:class:`covariance.MinCovDet`) of location and covariance to + assess the degree of outlyingness of an observation. - * See :ref:`sphx_glr_auto_examples_covariance_plot_mahalanobis_distances.py` for - an illustration of the difference between using a standard - (:class:`covariance.EmpiricalCovariance`) or a robust estimate - (:class:`covariance.MinCovDet`) of location and covariance to - assess the degree of outlyingness of an observation. +* See :ref:`sphx_glr_auto_examples_applications_plot_outlier_detection_wine.py` + for an example of robust covariance estimation on a real data set. -.. topic:: References: - * Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum - covariance determinant estimator" Technometrics 41(3), 212 (1999) +.. rubric:: References + +* Rousseeuw, P.J., Van Driessen, K. "A fast algorithm for the minimum + covariance determinant estimator" Technometrics 41(3), 212 (1999) .. _isolation_forest: @@ -299,22 +303,22 @@ allows you to add more trees to an already fitted model:: >>> clf.set_params(n_estimators=20) # add 10 more trees # doctest: +SKIP >>> clf.fit(X) # fit the added trees # doctest: +SKIP -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py` for - an illustration of the use of IsolationForest. +* See :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py` for + an illustration of the use of IsolationForest. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison of :class:`ensemble.IsolationForest` with - :class:`neighbors.LocalOutlierFactor`, - :class:`svm.OneClassSVM` (tuned to perform like an outlier detection - method), :class:`linear_model.SGDOneClassSVM`, and a covariance-based - outlier detection with :class:`covariance.EllipticEnvelope`. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison of :class:`ensemble.IsolationForest` with + :class:`neighbors.LocalOutlierFactor`, + :class:`svm.OneClassSVM` (tuned to perform like an outlier detection + method), :class:`linear_model.SGDOneClassSVM`, and a covariance-based + outlier detection with :class:`covariance.EllipticEnvelope`. -.. topic:: References: +.. rubric:: References - * Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." - Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. +* Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. "Isolation forest." + Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on. .. _local_outlier_factor: @@ -336,16 +340,14 @@ average local density of its k-nearest neighbors, and its own local density: a normal instance is expected to have a local density similar to that of its neighbors, while abnormal data are expected to have much smaller local density. -The number k of neighbors considered, (alias parameter n_neighbors) is typically -chosen 1) greater than the minimum number of objects a cluster has to contain, -so that other objects can be local outliers relative to this cluster, and 2) -smaller than the maximum number of close by objects that can potentially be -local outliers. -In practice, such information is generally not available, and taking -n_neighbors=20 appears to work well in general. -When the proportion of outliers is high (i.e. greater than 10 \%, as in the -example below), n_neighbors should be greater (n_neighbors=35 in the example -below). +The number k of neighbors considered, (alias parameter `n_neighbors`) is +typically chosen 1) greater than the minimum number of objects a cluster has to +contain, so that other objects can be local outliers relative to this cluster, +and 2) smaller than the maximum number of close by objects that can potentially +be local outliers. In practice, such information is generally not available, and +taking `n_neighbors=20` appears to work well in general. When the proportion of +outliers is high (i.e. greater than 10 \%, as in the example below), +`n_neighbors` should be greater (`n_neighbors=35` in the example below). The strength of the LOF algorithm is that it takes both local and global properties of datasets into consideration: it can perform well even in datasets @@ -370,20 +372,20 @@ This strategy is illustrated below. :align: center :scale: 75% -.. topic:: Examples: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py` - for an illustration of the use of :class:`neighbors.LocalOutlierFactor`. +* See :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py` + for an illustration of the use of :class:`neighbors.LocalOutlierFactor`. - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` - for a comparison with other anomaly detection methods. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py` + for a comparison with other anomaly detection methods. -.. topic:: References: +.. rubric:: References - * Breunig, Kriegel, Ng, and Sander (2000) - `LOF: identifying density-based local outliers. - `_ - Proc. ACM SIGMOD +* Breunig, Kriegel, Ng, and Sander (2000) + `LOF: identifying density-based local outliers. + `_ + Proc. ACM SIGMOD .. _novelty_with_lof: @@ -400,7 +402,7 @@ set to ``True`` before fitting the estimator:: Note that ``fit_predict`` is not available in this case to avoid inconsistencies. -.. warning:: **Novelty detection with Local Outlier Factor`** +.. warning:: **Novelty detection with Local Outlier Factor** When ``novelty`` is set to ``True`` be aware that you must only use ``predict``, ``decision_function`` and ``score_samples`` on new unseen data diff --git a/doc/modules/partial_dependence.rst b/doc/modules/partial_dependence.rst index 94f7206140b90..083b23c1f1c91 100644 --- a/doc/modules/partial_dependence.rst +++ b/doc/modules/partial_dependence.rst @@ -79,25 +79,21 @@ parameter takes a list of indices, names of the categorical features or a boolea mask. The graphical representation of partial dependence for categorical features is a bar plot or a 2D heatmap. -|details-start| -**PDPs for multi-class classification** -|details-split| - -For multi-class classification, you need to set the class label for which -the PDPs should be created via the ``target`` argument:: - - >>> from sklearn.datasets import load_iris - >>> iris = load_iris() - >>> mc_clf = GradientBoostingClassifier(n_estimators=10, - ... max_depth=1).fit(iris.data, iris.target) - >>> features = [3, 2, (3, 2)] - >>> PartialDependenceDisplay.from_estimator(mc_clf, X, features, target=0) - <...> +.. dropdown:: PDPs for multi-class classification + + For multi-class classification, you need to set the class label for which + the PDPs should be created via the ``target`` argument:: -The same parameter ``target`` is used to specify the target in multi-output -regression settings. + >>> from sklearn.datasets import load_iris + >>> iris = load_iris() + >>> mc_clf = GradientBoostingClassifier(n_estimators=10, + ... max_depth=1).fit(iris.data, iris.target) + >>> features = [3, 2, (3, 2)] + >>> PartialDependenceDisplay.from_estimator(mc_clf, X, features, target=0) + <...> -|details-end| + The same parameter ``target`` is used to specify the target in multi-output + regression settings. If you need the raw values of the partial dependence function rather than the plots, you can use the @@ -132,8 +128,8 @@ Due to the limits of human perception, only one input feature of interest is supported for ICE plots. The figures below show two ICE plots for the bike sharing dataset, -with a :class:`~sklearn.ensemble.HistGradientBoostingRegressor`:. -The figures plot the corresponding PD line overlaid on ICE lines. +with a :class:`~sklearn.ensemble.HistGradientBoostingRegressor`. The figures plot +the corresponding PD line overlaid on ICE lines. .. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_004.png :target: ../auto_examples/inspection/plot_partial_dependence.html @@ -144,8 +140,8 @@ While the PDPs are good at showing the average effect of the target features, they can obscure a heterogeneous relationship created by interactions. When interactions are present the ICE plot will provide many more insights. For example, we see that the ICE for the temperature feature gives us some -additional information: Some of the ICE lines are flat while some others -shows a decrease of the dependence for temperature above 35 degrees Celsius. +additional information: some of the ICE lines are flat while some others +show a decrease of the dependence for temperature above 35 degrees Celsius. We observe a similar pattern for the humidity feature: some of the ICE lines show a sharp decrease when the humidity is above 80%. @@ -233,7 +229,7 @@ over the dataset `X` which is computationally intensive. Each of the :math:`f(x_{S}, x_{C}^{(i)})` corresponds to one ICE line evaluated at :math:`x_{S}`. Computing this for multiple values of :math:`x_{S}`, one obtains a full ICE line. As one can see, the average of the ICE lines -correspond to the partial dependence line. +corresponds to the partial dependence line. The 'recursion' method is faster than the 'brute' method, but it is only supported for PDP plots by some tree-based estimators. It is computed as @@ -242,7 +238,7 @@ if a split node involves an input feature of interest, the corresponding left or right branch is followed; otherwise both branches are followed, each branch being weighted by the fraction of training samples that entered that branch. Finally, the partial dependence is given by a weighted average of all the -visited leaves values. +visited leaves' values. With the 'brute' method, the parameter `X` is used both for generating the grid of values :math:`x_S` and the complement feature values :math:`x_C`. @@ -266,9 +262,9 @@ estimators that support it, and 'brute' is used for the rest. interpreting PDPs is that the features should be independent. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` .. rubric:: Footnotes @@ -276,21 +272,20 @@ estimators that support it, and 'brute' is used for the rest. class (the positive class for binary classification), or the decision function. -.. topic:: References +.. rubric:: References - .. [H2009] T. Hastie, R. Tibshirani and J. Friedman, - `The Elements of Statistical Learning - `_, - Second Edition, Section 10.13.2, Springer, 2009. +.. [H2009] T. Hastie, R. Tibshirani and J. Friedman, + `The Elements of Statistical Learning + `_, + Second Edition, Section 10.13.2, Springer, 2009. - .. [M2019] C. Molnar, - `Interpretable Machine Learning - `_, - Section 5.1, 2019. +.. [M2019] C. Molnar, + `Interpretable Machine Learning + `_, + Section 5.1, 2019. - .. [G2015] :arxiv:`A. Goldstein, A. Kapelner, J. Bleich, and E. Pitkin, - "Peeking Inside the Black Box: Visualizing Statistical - Learning With Plots of Individual Conditional Expectation" - Journal of Computational and Graphical Statistics, - 24(1): 44-65, Springer, 2015. - <1309.6392>` +.. [G2015] :arxiv:`A. Goldstein, A. Kapelner, J. Bleich, and E. Pitkin, + "Peeking Inside the Black Box: Visualizing Statistical + Learning With Plots of Individual Conditional Expectation" + Journal of Computational and Graphical Statistics, + 24(1): 44-65, Springer, 2015. <1309.6392>` diff --git a/doc/modules/permutation_importance.rst b/doc/modules/permutation_importance.rst index 368c6a6409aa0..80bb5ef0eb650 100644 --- a/doc/modules/permutation_importance.rst +++ b/doc/modules/permutation_importance.rst @@ -15,7 +15,7 @@ single feature and observing the resulting degradation of the model's score determine how much the model relies on such particular feature. In the following figures, we observe the effect of permuting features on the correlation -between the feature and the target and consequently on the model statistical +between the feature and the target and consequently on the model's statistical performance. .. image:: ../images/permuted_predictive_feature.png @@ -25,9 +25,10 @@ performance. :align: center On the top figure, we observe that permuting a predictive feature breaks the -correlation between the feature and the target, and consequently the model +correlation between the feature and the target, and consequently the model's statistical performance decreases. On the bottom figure, we observe that permuting -a non-predictive feature does not significantly degrade the model statistical performance. +a non-predictive feature does not significantly degrade the model's statistical +performance. One key advantage of permutation feature importance is that it is model-agnostic, i.e. it can be applied to any fitted estimator. Moreover, it can @@ -38,7 +39,7 @@ specific trained model. The figure below shows the permutation feature importance of a :class:`~sklearn.ensemble.RandomForestClassifier` trained on an augmented version of the titanic dataset that contains a `random_cat` and a `random_num` -features, i.e. a categrical and a numerical feature that are not correlated in +features, i.e. a categorical and a numerical feature that are not correlated in any way with the target variable: .. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_002.png @@ -52,7 +53,7 @@ any way with the target variable: cross-validation score) could be **very important for a good model**. Therefore it is always important to evaluate the predictive power of a model using a held-out set (or better with cross-validation) prior to computing - importances. Permutation importance does not reflect to the intrinsic + importances. Permutation importance does not reflect the intrinsic predictive value of a feature by itself but **how important this feature is for a particular model**. @@ -110,48 +111,44 @@ which is more computationally efficient than sequentially calling :func:`permutation_importance` several times with a different scorer, as it reuses model predictions. -|details-start| -**Example of permutation feature importance using multiple scorers** -|details-split| - -In the example below we use a list of metrics, but more input formats are -possible, as documented in :ref:`multimetric_scoring`. - - >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'] - >>> r_multi = permutation_importance( - ... model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring) - ... - >>> for metric in r_multi: - ... print(f"{metric}") - ... r = r_multi[metric] - ... for i in r.importances_mean.argsort()[::-1]: - ... if r.importances_mean[i] - 2 * r.importances_std[i] > 0: - ... print(f" {diabetes.feature_names[i]:<8}" - ... f"{r.importances_mean[i]:.3f}" - ... f" +/- {r.importances_std[i]:.3f}") - ... - r2 - s5 0.204 +/- 0.050 - bmi 0.176 +/- 0.048 - bp 0.088 +/- 0.033 - sex 0.056 +/- 0.023 - neg_mean_absolute_percentage_error - s5 0.081 +/- 0.020 - bmi 0.064 +/- 0.015 - bp 0.029 +/- 0.010 - neg_mean_squared_error - s5 1013.866 +/- 246.445 - bmi 872.726 +/- 240.298 - bp 438.663 +/- 163.022 - sex 277.376 +/- 115.123 - -The ranking of the features is approximately the same for different metrics even -if the scales of the importance values are very different. However, this is not -guaranteed and different metrics might lead to significantly different feature -importances, in particular for models trained for imbalanced classification problems, -for which **the choice of the classification metric can be critical**. - -|details-end| +.. dropdown:: Example of permutation feature importance using multiple scorers + + In the example below we use a list of metrics, but more input formats are + possible, as documented in :ref:`multimetric_scoring`. + + >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'] + >>> r_multi = permutation_importance( + ... model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring) + ... + >>> for metric in r_multi: + ... print(f"{metric}") + ... r = r_multi[metric] + ... for i in r.importances_mean.argsort()[::-1]: + ... if r.importances_mean[i] - 2 * r.importances_std[i] > 0: + ... print(f" {diabetes.feature_names[i]:<8}" + ... f"{r.importances_mean[i]:.3f}" + ... f" +/- {r.importances_std[i]:.3f}") + ... + r2 + s5 0.204 +/- 0.050 + bmi 0.176 +/- 0.048 + bp 0.088 +/- 0.033 + sex 0.056 +/- 0.023 + neg_mean_absolute_percentage_error + s5 0.081 +/- 0.020 + bmi 0.064 +/- 0.015 + bp 0.029 +/- 0.010 + neg_mean_squared_error + s5 1013.866 +/- 246.445 + bmi 872.726 +/- 240.298 + bp 438.663 +/- 163.022 + sex 277.376 +/- 115.123 + + The ranking of the features is approximately the same for different metrics even + if the scales of the importance values are very different. However, this is not + guaranteed and different metrics might lead to significantly different feature + importances, in particular for models trained for imbalanced classification problems, + for which **the choice of the classification metric can be critical**. Outline of the permutation importance algorithm ----------------------------------------------- @@ -185,7 +182,7 @@ importance to features that may not be predictive on unseen data when the model is overfitting. Permutation-based feature importance, on the other hand, avoids this issue, since it can be computed on unseen data. -Furthermore, impurity-based feature importance for trees are **strongly +Furthermore, impurity-based feature importance for trees is **strongly biased** and **favor high cardinality features** (typically numerical features) over low cardinality features such as binary features or categorical variables with a small number of possible categories. @@ -228,12 +225,12 @@ keep one feature from each cluster. For more details on such strategy, see the example :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py`. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py` +* :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py` -.. topic:: References: +.. rubric:: References - .. [1] L. Breiman, :doi:`"Random Forests" <10.1023/A:1010933404324>`, - Machine Learning, 45(1), 5-32, 2001. +.. [1] L. Breiman, :doi:`"Random Forests" <10.1023/A:1010933404324>`, + Machine Learning, 45(1), 5-32, 2001. diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index 99678f2b3e45b..69dff95518c41 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -14,7 +14,7 @@ In general, many learning algorithms such as linear models benefit from standard (see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`). If some outliers are present in the set, robust scalers or other transformers can be more appropriate. The behaviors of the different scalers, transformers, and -normalizers on a dataset containing marginal outliers is highlighted in +normalizers on a dataset containing marginal outliers are highlighted in :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`. @@ -57,16 +57,16 @@ dataset:: StandardScaler() >>> scaler.mean_ - array([1. ..., 0. ..., 0.33...]) + array([1., 0., 0.33]) >>> scaler.scale_ - array([0.81..., 0.81..., 1.24...]) + array([0.81, 0.81, 1.24]) >>> X_scaled = scaler.transform(X_train) >>> X_scaled - array([[ 0. ..., -1.22..., 1.33...], - [ 1.22..., 0. ..., -0.26...], - [-1.22..., 1.22..., -1.06...]]) + array([[ 0. , -1.22, 1.33 ], + [ 1.22, 0. , -0.267], + [-1.22, 1.22, -1.06 ]]) .. >>> import numpy as np @@ -118,7 +118,7 @@ or so that the maximum absolute value of each feature is scaled to unit size. This can be achieved using :class:`MinMaxScaler` or :class:`MaxAbsScaler`, respectively. -The motivation to use this scaling include robustness to very small +The motivation to use this scaling includes robustness to very small standard deviations of features and preserving zero entries in sparse data. Here is an example to scale a toy data matrix to the ``[0, 1]`` range:: @@ -147,10 +147,10 @@ It is possible to introspect the scaler attributes to find about the exact nature of the transformation learned on the training data:: >>> min_max_scaler.scale_ - array([0.5 , 0.5 , 0.33...]) + array([0.5 , 0.5 , 0.33]) >>> min_max_scaler.min_ - array([0. , 0.5 , 0.33...]) + array([0. , 0.5 , 0.33]) If :class:`MinMaxScaler` is given an explicit ``feature_range=(min, max)`` the full formula is:: @@ -219,28 +219,22 @@ of the data is likely to not work very well. In these cases, you can use :class:`RobustScaler` as a drop-in replacement instead. It uses more robust estimates for the center and range of your data. -|details-start| -**References** -|details-split| -Further discussion on the importance of centering and scaling data is -available on this FAQ: `Should I normalize/standardize/rescale the data? -`_ +.. dropdown:: References -|details-end| + Further discussion on the importance of centering and scaling data is + available on this FAQ: `Should I normalize/standardize/rescale the data? + `_ -|details-start| -**Scaling vs Whitening** -|details-split| +.. dropdown:: Scaling vs Whitening -It is sometimes not enough to center and scale the features -independently, since a downstream model can further make some assumption -on the linear independence of the features. + It is sometimes not enough to center and scale the features + independently, since a downstream model can further make some assumption + on the linear independence of the features. -To address this issue you can use :class:`~sklearn.decomposition.PCA` with -``whiten=True`` to further remove the linear correlation across features. + To address this issue you can use :class:`~sklearn.decomposition.PCA` with + ``whiten=True`` to further remove the linear correlation across features. -|details-end| .. _kernel_centering: @@ -255,63 +249,59 @@ followed by the removal of the mean in that space. In other words, :class:`KernelCenterer` computes the centered Gram matrix associated to a positive semidefinite kernel :math:`K`. -|details-start| -**Mathematical formulation** -|details-split| +.. dropdown:: Mathematical formulation -We can have a look at the mathematical formulation now that we have the -intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)` -computed from :math:`X`, a data matrix of shape `(n_samples, n_features)`, -during the `fit` step. :math:`K` is defined by + We can have a look at the mathematical formulation now that we have the + intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)` + computed from :math:`X`, a data matrix of shape `(n_samples, n_features)`, + during the `fit` step. :math:`K` is defined by -.. math:: - K(X, X) = \phi(X) . \phi(X)^{T} + .. math:: + K(X, X) = \phi(X) . \phi(X)^{T} -:math:`\phi(X)` is a function mapping of :math:`X` to a Hilbert space. A -centered kernel :math:`\tilde{K}` is defined as: + :math:`\phi(X)` is a function mapping of :math:`X` to a Hilbert space. A + centered kernel :math:`\tilde{K}` is defined as: -.. math:: - \tilde{K}(X, X) = \tilde{\phi}(X) . \tilde{\phi}(X)^{T} + .. math:: + \tilde{K}(X, X) = \tilde{\phi}(X) . \tilde{\phi}(X)^{T} -where :math:`\tilde{\phi}(X)` results from centering :math:`\phi(X)` in the -Hilbert space. + where :math:`\tilde{\phi}(X)` results from centering :math:`\phi(X)` in the + Hilbert space. -Thus, one could compute :math:`\tilde{K}` by mapping :math:`X` using the -function :math:`\phi(\cdot)` and center the data in this new space. However, -kernels are often used because they allows some algebra calculations that -avoid computing explicitly this mapping using :math:`\phi(\cdot)`. Indeed, one -can implicitly center as shown in Appendix B in [Scholkopf1998]_: + Thus, one could compute :math:`\tilde{K}` by mapping :math:`X` using the + function :math:`\phi(\cdot)` and center the data in this new space. However, + kernels are often used because they allow some algebra calculations that + avoid computing explicitly this mapping using :math:`\phi(\cdot)`. Indeed, one + can implicitly center as shown in Appendix B in [Scholkopf1998]_: -.. math:: - \tilde{K} = K - 1_{\text{n}_{samples}} K - K 1_{\text{n}_{samples}} + 1_{\text{n}_{samples}} K 1_{\text{n}_{samples}} + .. math:: + \tilde{K} = K - 1_{\text{n}_{samples}} K - K 1_{\text{n}_{samples}} + 1_{\text{n}_{samples}} K 1_{\text{n}_{samples}} -:math:`1_{\text{n}_{samples}}` is a matrix of `(n_samples, n_samples)` where -all entries are equal to :math:`\frac{1}{\text{n}_{samples}}`. In the -`transform` step, the kernel becomes :math:`K_{test}(X, Y)` defined as: + :math:`1_{\text{n}_{samples}}` is a matrix of `(n_samples, n_samples)` where + all entries are equal to :math:`\frac{1}{\text{n}_{samples}}`. In the + `transform` step, the kernel becomes :math:`K_{test}(X, Y)` defined as: -.. math:: - K_{test}(X, Y) = \phi(Y) . \phi(X)^{T} + .. math:: + K_{test}(X, Y) = \phi(Y) . \phi(X)^{T} -:math:`Y` is the test dataset of shape `(n_samples_test, n_features)` and thus -:math:`K_{test}` is of shape `(n_samples_test, n_samples)`. In this case, -centering :math:`K_{test}` is done as: + :math:`Y` is the test dataset of shape `(n_samples_test, n_features)` and thus + :math:`K_{test}` is of shape `(n_samples_test, n_samples)`. In this case, + centering :math:`K_{test}` is done as: -.. math:: - \tilde{K}_{test}(X, Y) = K_{test} - 1'_{\text{n}_{samples}} K - K_{test} 1_{\text{n}_{samples}} + 1'_{\text{n}_{samples}} K 1_{\text{n}_{samples}} + .. math:: + \tilde{K}_{test}(X, Y) = K_{test} - 1'_{\text{n}_{samples}} K - K_{test} 1_{\text{n}_{samples}} + 1'_{\text{n}_{samples}} K 1_{\text{n}_{samples}} -:math:`1'_{\text{n}_{samples}}` is a matrix of shape -`(n_samples_test, n_samples)` where all entries are equal to -:math:`\frac{1}{\text{n}_{samples}}`. + :math:`1'_{\text{n}_{samples}}` is a matrix of shape + `(n_samples_test, n_samples)` where all entries are equal to + :math:`\frac{1}{\text{n}_{samples}}`. -.. topic:: References + .. rubric:: References .. [Scholkopf1998] B. Schölkopf, A. Smola, and K.R. Müller, `"Nonlinear component analysis as a kernel eigenvalue problem." `_ Neural computation 10.5 (1998): 1299-1319. -|details-end| - .. _preprocessing_transformer: Non-linear transformation @@ -356,21 +346,21 @@ with values between 0 and 1:: array([ 4.3, 5.1, 5.8, 6.5, 7.9]) This feature corresponds to the sepal length in cm. Once the quantile -transformation applied, those landmarks approach closely the percentiles +transformation is applied, those landmarks approach closely the percentiles previously defined:: >>> np.percentile(X_train_trans[:, 0], [0, 25, 50, 75, 100]) ... # doctest: +SKIP - array([ 0.00... , 0.24..., 0.49..., 0.73..., 0.99... ]) + array([ 0.00 , 0.24, 0.49, 0.73, 0.99 ]) -This can be confirmed on a independent testing set with similar remarks:: +This can be confirmed on an independent testing set with similar remarks:: >>> np.percentile(X_test[:, 0], [0, 25, 50, 75, 100]) ... # doctest: +SKIP array([ 4.4 , 5.125, 5.75 , 6.175, 7.3 ]) >>> np.percentile(X_test_trans[:, 0], [0, 25, 50, 75, 100]) ... # doctest: +SKIP - array([ 0.01..., 0.25..., 0.46..., 0.60... , 0.94...]) + array([ 0.01, 0.25, 0.46, 0.60 , 0.94]) Mapping to a Gaussian distribution ---------------------------------- @@ -383,54 +373,46 @@ possible in order to stabilize variance and minimize skewness. :class:`PowerTransformer` currently provides two such power transformations, the Yeo-Johnson transform and the Box-Cox transform. -|details-start| -**Yeo-Johnson transform** -|details-split| - -.. math:: - x_i^{(\lambda)} = - \begin{cases} - [(x_i + 1)^\lambda - 1] / \lambda & \text{if } \lambda \neq 0, x_i \geq 0, \\[8pt] - \ln{(x_i + 1)} & \text{if } \lambda = 0, x_i \geq 0 \\[8pt] - -[(-x_i + 1)^{2 - \lambda} - 1] / (2 - \lambda) & \text{if } \lambda \neq 2, x_i < 0, \\[8pt] - - \ln (- x_i + 1) & \text{if } \lambda = 2, x_i < 0 - \end{cases} - -|details-end| - -|details-start| -**Box-Cox transform** -|details-split| - -.. math:: - x_i^{(\lambda)} = - \begin{cases} - \dfrac{x_i^\lambda - 1}{\lambda} & \text{if } \lambda \neq 0, \\[8pt] - \ln{(x_i)} & \text{if } \lambda = 0, - \end{cases} - - -Box-Cox can only be applied to strictly positive data. In both methods, the -transformation is parameterized by :math:`\lambda`, which is determined through -maximum likelihood estimation. Here is an example of using Box-Cox to map -samples drawn from a lognormal distribution to a normal distribution:: - - >>> pt = preprocessing.PowerTransformer(method='box-cox', standardize=False) - >>> X_lognormal = np.random.RandomState(616).lognormal(size=(3, 3)) - >>> X_lognormal - array([[1.28..., 1.18..., 0.84...], - [0.94..., 1.60..., 0.38...], - [1.35..., 0.21..., 1.09...]]) - >>> pt.fit_transform(X_lognormal) - array([[ 0.49..., 0.17..., -0.15...], - [-0.05..., 0.58..., -0.57...], - [ 0.69..., -0.84..., 0.10...]]) - -While the above example sets the `standardize` option to `False`, -:class:`PowerTransformer` will apply zero-mean, unit-variance normalization -to the transformed output by default. - -|details-end| +.. dropdown:: Yeo-Johnson transform + + .. math:: + x_i^{(\lambda)} = + \begin{cases} + [(x_i + 1)^\lambda - 1] / \lambda & \text{if } \lambda \neq 0, x_i \geq 0, \\[8pt] + \ln{(x_i + 1)} & \text{if } \lambda = 0, x_i \geq 0 \\[8pt] + -[(-x_i + 1)^{2 - \lambda} - 1] / (2 - \lambda) & \text{if } \lambda \neq 2, x_i < 0, \\[8pt] + - \ln (- x_i + 1) & \text{if } \lambda = 2, x_i < 0 + \end{cases} + +.. dropdown:: Box-Cox transform + + .. math:: + x_i^{(\lambda)} = + \begin{cases} + \dfrac{x_i^\lambda - 1}{\lambda} & \text{if } \lambda \neq 0, \\[8pt] + \ln{(x_i)} & \text{if } \lambda = 0, + \end{cases} + + Box-Cox can only be applied to strictly positive data. In both methods, the + transformation is parameterized by :math:`\lambda`, which is determined through + maximum likelihood estimation. Here is an example of using Box-Cox to map + samples drawn from a lognormal distribution to a normal distribution:: + + >>> pt = preprocessing.PowerTransformer(method='box-cox', standardize=False) + >>> X_lognormal = np.random.RandomState(616).lognormal(size=(3, 3)) + >>> X_lognormal + array([[1.28, 1.18 , 0.84 ], + [0.94, 1.60 , 0.388], + [1.35, 0.217, 1.09 ]]) + >>> pt.fit_transform(X_lognormal) + array([[ 0.49 , 0.179, -0.156], + [-0.051, 0.589, -0.576], + [ 0.69 , -0.849, 0.101]]) + + While the above example sets the `standardize` option to `False`, + :class:`PowerTransformer` will apply zero-mean, unit-variance normalization + to the transformed output by default. + Below are examples of Box-Cox and Yeo-Johnson applied to various probability distributions. Note that when applied to certain distributions, the power @@ -488,9 +470,9 @@ operation on a single array-like dataset, either using the ``l1``, ``l2``, or >>> X_normalized = preprocessing.normalize(X, norm='l2') >>> X_normalized - array([[ 0.40..., -0.40..., 0.81...], - [ 1. ..., 0. ..., 0. ...], - [ 0. ..., 0.70..., -0.70...]]) + array([[ 0.408, -0.408, 0.812], + [ 1. , 0. , 0. ], + [ 0. , 0.707, -0.707]]) The ``preprocessing`` module further provides a utility class :class:`Normalizer` that implements the same operation using the @@ -508,19 +490,18 @@ This class is hence suitable for use in the early steps of a The normalizer instance can then be used on sample vectors as any transformer:: >>> normalizer.transform(X) - array([[ 0.40..., -0.40..., 0.81...], - [ 1. ..., 0. ..., 0. ...], - [ 0. ..., 0.70..., -0.70...]]) + array([[ 0.408, -0.408, 0.812], + [ 1. , 0. , 0. ], + [ 0. , 0.707, -0.707]]) >>> normalizer.transform([[-1., 1., 0.]]) - array([[-0.70..., 0.70..., 0. ...]]) + array([[-0.707, 0.707, 0.]]) Note: L2 normalization is also known as spatial sign preprocessing. -|details-start| -**Sparse input** -|details-split| +.. dropdown:: Sparse input + :func:`normalize` and :class:`Normalizer` accept **both dense array-like and sparse matrices from scipy.sparse as input**. @@ -529,12 +510,11 @@ Note: L2 normalization is also known as spatial sign preprocessing. efficient Cython routines. To avoid unnecessary memory copies, it is recommended to choose the CSR representation upstream. -|details-end| - .. _preprocessing_categorical_features: Encoding categorical features ============================= + Often features are not given as continuous values but categorical. For example a person could have features ``["male", "female"]``, ``["from Europe", "from US", "from Asia"]``, @@ -694,7 +674,7 @@ categories. In this case, you can set the parameter `drop='if_binary'`. [0., 1., 0., 0., 1., 0., 0.]]) In the transformed `X`, the first column is the encoding of the feature with -categories "male"/"female", while the remaining 6 columns is the encoding of +categories "male"/"female", while the remaining 6 columns are the encoding of the 2 features with respectively 3 categories each. When `handle_unknown='ignore'` and `drop` is not None, unknown categories will @@ -721,42 +701,39 @@ not dropped:: >>> drop_enc.inverse_transform(X_trans) array([['female', None, None]], dtype=object) -|details-start| -**Support of categorical features with missing values** -|details-split| +.. dropdown:: Support of categorical features with missing values -:class:`OneHotEncoder` supports categorical features with missing values by -considering the missing values as an additional category:: + :class:`OneHotEncoder` supports categorical features with missing values by + considering the missing values as an additional category:: - >>> X = [['male', 'Safari'], - ... ['female', None], - ... [np.nan, 'Firefox']] - >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) - >>> enc.categories_ - [array(['female', 'male', nan], dtype=object), - array(['Firefox', 'Safari', None], dtype=object)] - >>> enc.transform(X).toarray() - array([[0., 1., 0., 0., 1., 0.], - [1., 0., 0., 0., 0., 1.], - [0., 0., 1., 1., 0., 0.]]) - -If a feature contains both `np.nan` and `None`, they will be considered -separate categories:: - - >>> X = [['Safari'], [None], [np.nan], ['Firefox']] - >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) - >>> enc.categories_ - [array(['Firefox', 'Safari', None, nan], dtype=object)] - >>> enc.transform(X).toarray() - array([[0., 1., 0., 0.], - [0., 0., 1., 0.], - [0., 0., 0., 1.], - [1., 0., 0., 0.]]) + >>> X = [['male', 'Safari'], + ... ['female', None], + ... [np.nan, 'Firefox']] + >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) + >>> enc.categories_ + [array(['female', 'male', nan], dtype=object), + array(['Firefox', 'Safari', None], dtype=object)] + >>> enc.transform(X).toarray() + array([[0., 1., 0., 0., 1., 0.], + [1., 0., 0., 0., 0., 1.], + [0., 0., 1., 1., 0., 0.]]) + + If a feature contains both `np.nan` and `None`, they will be considered + separate categories:: + + >>> X = [['Safari'], [None], [np.nan], ['Firefox']] + >>> enc = preprocessing.OneHotEncoder(handle_unknown='error').fit(X) + >>> enc.categories_ + [array(['Firefox', 'Safari', None, nan], dtype=object)] + >>> enc.transform(X).toarray() + array([[0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]]) -See :ref:`dict_feature_extraction` for categorical features that are -represented as a dict, not as scalars. + See :ref:`dict_feature_extraction` for categorical features that are + represented as a dict, not as scalars. -|details-end| .. _encoder_infrequent_categories: @@ -780,8 +757,8 @@ enable the gathering of infrequent categories are `min_frequency` and input feature. `max_categories` includes the feature that combines infrequent categories. -In the following example with :class:`OrdinalEncoder`, the categories `'dog' and -'snake'` are considered infrequent:: +In the following example with :class:`OrdinalEncoder`, the categories `'dog'` +and `'snake'` are considered infrequent:: >>> X = np.array([['dog'] * 5 + ['cat'] * 20 + ['rabbit'] * 10 + ... ['snake'] * 3], dtype=object).T @@ -818,7 +795,7 @@ and missing values are encoded as 4. [3.], [4.]]) -Similarity, :class:`OneHotEncoder` can be configured to group together infrequent +Similarly, :class:`OneHotEncoder` can be configured to group together infrequent categories:: >>> enc = preprocessing.OneHotEncoder(min_frequency=6, sparse_output=False).fit(X) @@ -886,7 +863,7 @@ infrequent:: [0., 0., 1.]]) If there are infrequent categories with the same cardinality at the cutoff of -`max_categories`, then then the first `max_categories` are taken based on lexicon +`max_categories`, then the first `max_categories` are taken based on lexicon ordering. In the following example, "b", "c", and "d", have the same cardinality and with `max_categories=2`, "b" and "c" are infrequent because they have a higher lexicon order. @@ -910,66 +887,55 @@ cardinality, where one-hot encoding would inflate the feature space making it more expensive for a downstream model to process. A classical example of high cardinality categories are location based such as zip code or region. -|details-start| -**Binary classification targets** -|details-split| - -For the binary classification target, the target encoding is given by: - -.. math:: - S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_Y}{n} +.. dropdown:: Binary classification targets -where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the -number of observations with :math:`Y=1` and category :math:`i`, :math:`n_i` is -the number of observations with category :math:`i`, :math:`n_Y` is the number of -observations with :math:`Y=1`, :math:`n` is the number of observations, and -:math:`\lambda_i` is a shrinkage factor for category :math:`i`. The shrinkage -factor is given by: + For the binary classification target, the target encoding is given by: -.. math:: - \lambda_i = \frac{n_i}{m + n_i} + .. math:: + S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_Y}{n} -where :math:`m` is a smoothing factor, which is controlled with the `smooth` -parameter in :class:`TargetEncoder`. Large smoothing factors will put more -weight on the global mean. When `smooth="auto"`, the smoothing factor is -computed as an empirical Bayes estimate: :math:`m=\sigma_i^2/\tau^2`, where -:math:`\sigma_i^2` is the variance of `y` with category :math:`i` and -:math:`\tau^2` is the global variance of `y`. + where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the + number of observations with :math:`Y=1` and category :math:`i`, :math:`n_i` is + the number of observations with category :math:`i`, :math:`n_Y` is the number of + observations with :math:`Y=1`, :math:`n` is the number of observations, and + :math:`\lambda_i` is a shrinkage factor for category :math:`i`. The shrinkage + factor is given by: -|details-end| + .. math:: + \lambda_i = \frac{n_i}{m + n_i} -|details-start| -**Multiclass classification targets** -|details-split| + where :math:`m` is a smoothing factor, which is controlled with the `smooth` + parameter in :class:`TargetEncoder`. Large smoothing factors will put more + weight on the global mean. When `smooth="auto"`, the smoothing factor is + computed as an empirical Bayes estimate: :math:`m=\sigma_i^2/\tau^2`, where + :math:`\sigma_i^2` is the variance of `y` with category :math:`i` and + :math:`\tau^2` is the global variance of `y`. -For multiclass classification targets, the formulation is similar to binary -classification: +.. dropdown:: Multiclass classification targets -.. math:: - S_{ij} = \lambda_i\frac{n_{iY_j}}{n_i} + (1 - \lambda_i)\frac{n_{Y_j}}{n} + For multiclass classification targets, the formulation is similar to binary + classification: -where :math:`S_{ij}` is the encoding for category :math:`i` and class :math:`j`, -:math:`n_{iY_j}` is the number of observations with :math:`Y=j` and category -:math:`i`, :math:`n_i` is the number of observations with category :math:`i`, -:math:`n_{Y_j}` is the number of observations with :math:`Y=j`, :math:`n` is the -number of observations, and :math:`\lambda_i` is a shrinkage factor for category -:math:`i`. + .. math:: + S_{ij} = \lambda_i\frac{n_{iY_j}}{n_i} + (1 - \lambda_i)\frac{n_{Y_j}}{n} -|details-end| + where :math:`S_{ij}` is the encoding for category :math:`i` and class :math:`j`, + :math:`n_{iY_j}` is the number of observations with :math:`Y=j` and category + :math:`i`, :math:`n_i` is the number of observations with category :math:`i`, + :math:`n_{Y_j}` is the number of observations with :math:`Y=j`, :math:`n` is the + number of observations, and :math:`\lambda_i` is a shrinkage factor for category + :math:`i`. -|details-start| -**Continuous targets** -|details-split| +.. dropdown:: Continuous targets -For continuous targets, the formulation is similar to binary classification: + For continuous targets, the formulation is similar to binary classification: -.. math:: - S_i = \lambda_i\frac{\sum_{k\in L_i}Y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}Y_k}{n} + .. math:: + S_i = \lambda_i\frac{\sum_{k\in L_i}Y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}Y_k}{n} -where :math:`L_i` is the set of observations with category :math:`i` and -:math:`n_i` is the number of observations with category :math:`i`. + where :math:`L_i` is the set of observations with category :math:`i` and + :math:`n_i` is the number of observations with category :math:`i`. -|details-end| :meth:`~TargetEncoder.fit_transform` internally relies on a :term:`cross fitting` scheme to prevent target information from leaking into the train-time @@ -1005,21 +971,21 @@ encoding learned in :meth:`~TargetEncoder.fit_transform`. that are not seen during `fit` are encoded with the target mean, i.e. `target_mean_`. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py` -.. topic:: References +.. rubric:: References - .. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality - categorical attributes in classification and prediction problems" - SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32. <10.1145/507533.507538>` +.. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality + categorical attributes in classification and prediction problems" + SIGKDD Explor. Newsl. 3, 1 (July 2001), 27-32. <10.1145/507533.507538>` - .. [PAR] :doi:`Pargent, F., Pfisterer, F., Thomas, J. et al. "Regularized target - encoding outperforms traditional methods in supervised machine learning with - high cardinality features" Comput Stat 37, 2671–2692 (2022) - <10.1007/s00180-022-01207-6>` +.. [PAR] :doi:`Pargent, F., Pfisterer, F., Thomas, J. et al. "Regularized target + encoding outperforms traditional methods in supervised machine learning with + high cardinality features" Comput Stat 37, 2671-2692 (2022) + <10.1007/s00180-022-01207-6>` .. _preprocessing_discretization: @@ -1097,11 +1063,11 @@ For instance, we can use the Pandas function :func:`pandas.cut`:: ['infant', 'kid', 'teen', 'adult', 'senior citizen'] Categories (5, object): ['infant' < 'kid' < 'teen' < 'adult' < 'senior citizen'] -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py` - * :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py` +* :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py` .. _preprocessing_binarization: @@ -1294,23 +1260,20 @@ Interestingly, a :class:`SplineTransformer` of ``degree=0`` is the same as ``encode='onehot-dense'`` and ``n_bins = n_knots - 1`` if ``knots = strategy``. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` - * :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` +* :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` +* :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` -|details-start| -**References** -|details-split| +.. dropdown:: References - * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and - Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121. + * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and + Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121. - * Perperoglou, A., Sauerbrei, W., Abrahamowicz, M. et al. :doi:`A review of - spline function procedures in R <10.1186/s12874-019-0666-3>`. - BMC Med Res Methodol 19, 46 (2019). + * Perperoglou, A., Sauerbrei, W., Abrahamowicz, M. et al. :doi:`A review of + spline function procedures in R <10.1186/s12874-019-0666-3>`. + BMC Med Res Methodol 19, 46 (2019). -|details-end| .. _function_transformer: diff --git a/doc/modules/preprocessing_targets.rst b/doc/modules/preprocessing_targets.rst index b7e8802785257..f8035bc059af4 100644 --- a/doc/modules/preprocessing_targets.rst +++ b/doc/modules/preprocessing_targets.rst @@ -95,8 +95,8 @@ hashable and comparable) to numerical labels:: >>> le.fit(["paris", "paris", "tokyo", "amsterdam"]) LabelEncoder() >>> list(le.classes_) - ['amsterdam', 'paris', 'tokyo'] + [np.str_('amsterdam'), np.str_('paris'), np.str_('tokyo')] >>> le.transform(["tokyo", "tokyo", "paris"]) array([2, 2, 1]) >>> list(le.inverse_transform([2, 2, 1])) - ['tokyo', 'tokyo', 'paris'] + [np.str_('tokyo'), np.str_('tokyo'), np.str_('paris')] diff --git a/doc/modules/random_projection.rst b/doc/modules/random_projection.rst index 6931feb34ad1d..ec437c60c7d4c 100644 --- a/doc/modules/random_projection.rst +++ b/doc/modules/random_projection.rst @@ -19,19 +19,19 @@ samples of the dataset. Thus random projection is a suitable approximation technique for distance based method. -.. topic:: References: +.. rubric:: References - * Sanjoy Dasgupta. 2000. - `Experiments with random projection. `_ - In Proceedings of the Sixteenth conference on Uncertainty in artificial - intelligence (UAI'00), Craig Boutilier and Moisés Goldszmidt (Eds.). Morgan - Kaufmann Publishers Inc., San Francisco, CA, USA, 143-151. +* Sanjoy Dasgupta. 2000. + `Experiments with random projection. `_ + In Proceedings of the Sixteenth conference on Uncertainty in artificial + intelligence (UAI'00), Craig Boutilier and Moisés Goldszmidt (Eds.). Morgan + Kaufmann Publishers Inc., San Francisco, CA, USA, 143-151. - * Ella Bingham and Heikki Mannila. 2001. - `Random projection in dimensionality reduction: applications to image and text data. `_ - In Proceedings of the seventh ACM SIGKDD international conference on - Knowledge discovery and data mining (KDD '01). ACM, New York, NY, USA, - 245-250. +* Ella Bingham and Heikki Mannila. 2001. + `Random projection in dimensionality reduction: applications to image and text data. `_ + In Proceedings of the seventh ACM SIGKDD international conference on + Knowledge discovery and data mining (KDD '01). ACM, New York, NY, USA, + 245-250. .. _johnson_lindenstrauss: @@ -58,7 +58,7 @@ bounded distortion introduced by the random projection:: >>> from sklearn.random_projection import johnson_lindenstrauss_min_dim >>> johnson_lindenstrauss_min_dim(n_samples=1e6, eps=0.5) - 663 + np.int64(663) >>> johnson_lindenstrauss_min_dim(n_samples=1e6, eps=[0.5, 0.1, 0.01]) array([ 663, 11841, 1112658]) >>> johnson_lindenstrauss_min_dim(n_samples=[1e4, 1e5, 1e6], eps=0.1) @@ -74,17 +74,17 @@ bounded distortion introduced by the random projection:: :scale: 75 :align: center -.. topic:: Example: +.. rubric:: Examples - * See :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` - for a theoretical explication on the Johnson-Lindenstrauss lemma and an - empirical validation using sparse random matrices. +* See :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` + for a theoretical explication on the Johnson-Lindenstrauss lemma and an + empirical validation using sparse random matrices. -.. topic:: References: +.. rubric:: References - * Sanjoy Dasgupta and Anupam Gupta, 1999. - `An elementary proof of the Johnson-Lindenstrauss Lemma. - `_ +* Sanjoy Dasgupta and Anupam Gupta, 1999. + `An elementary proof of the Johnson-Lindenstrauss Lemma. + `_ .. _gaussian_random_matrix: @@ -95,7 +95,7 @@ dimensionality by projecting the original input space on a randomly generated matrix where components are drawn from the following distribution :math:`N(0, \frac{1}{n_{components}})`. -Here a small excerpt which illustrates how to use the Gaussian random +Here is a small excerpt which illustrates how to use the Gaussian random projection transformer:: >>> import numpy as np @@ -136,7 +136,7 @@ where :math:`n_{\text{components}}` is the size of the projected subspace. By default the density of non zero elements is set to the minimum density as recommended by Ping Li et al.: :math:`1 / \sqrt{n_{\text{features}}}`. -Here a small excerpt which illustrates how to use the sparse random +Here is a small excerpt which illustrates how to use the sparse random projection transformer:: >>> import numpy as np @@ -148,18 +148,17 @@ projection transformer:: (100, 3947) -.. topic:: References: +.. rubric:: References - * D. Achlioptas. 2003. - `Database-friendly random projections: Johnson-Lindenstrauss with binary - coins `_. - Journal of Computer and System Sciences 66 (2003) 671–687 +* D. Achlioptas. 2003. + `Database-friendly random projections: Johnson-Lindenstrauss with binary + coins `_. + Journal of Computer and System Sciences 66 (2003) 671-687. - * Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006. - `Very sparse random projections. `_ - In Proceedings of the 12th ACM SIGKDD international conference on - Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA, - 287-296. +* Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006. + `Very sparse random projections. `_ + In Proceedings of the 12th ACM SIGKDD international conference on + Knowledge discovery and data mining (KDD '06). ACM, New York, NY, USA, 287-296. .. _random_projection_inverse_transform: @@ -180,7 +179,7 @@ been computed during fit, they are reused at each call to ``inverse_transform``. Otherwise they are recomputed each time, which can be costly. The result is always dense, even if ``X`` is sparse. -Here a small code example which illustrates how to use the inverse transform +Here is a small code example which illustrates how to use the inverse transform feature:: >>> import numpy as np diff --git a/doc/modules/semi_supervised.rst b/doc/modules/semi_supervised.rst index f8cae0a9ddcdf..6c050b698f42c 100644 --- a/doc/modules/semi_supervised.rst +++ b/doc/modules/semi_supervised.rst @@ -40,8 +40,8 @@ this algorithm, a given supervised classifier can function as a semi-supervised classifier, allowing it to learn from unlabeled data. :class:`SelfTrainingClassifier` can be called with any classifier that -implements `predict_proba`, passed as the parameter `base_classifier`. In -each iteration, the `base_classifier` predicts labels for the unlabeled +implements `predict_proba`, passed as the parameter `estimator`. In +each iteration, the `estimator` predicts labels for the unlabeled samples and adds a subset of these labels to the labeled dataset. The choice of this subset is determined by the selection criterion. This @@ -60,18 +60,18 @@ until all samples have labels or no new samples are selected in that iteration. When using the self-training classifier, the :ref:`calibration ` of the classifier is important. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_self_training_varying_threshold.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_self_training_varying_threshold.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` -.. topic:: References +.. rubric:: References - .. [1] :doi:`"Unsupervised word sense disambiguation rivaling supervised methods" - <10.3115/981658.981684>` - David Yarowsky, Proceedings of the 33rd annual meeting on Association for - Computational Linguistics (ACL '95). Association for Computational Linguistics, - Stroudsburg, PA, USA, 189-196. +.. [1] :doi:`"Unsupervised word sense disambiguation rivaling supervised methods" + <10.3115/981658.981684>` + David Yarowsky, Proceedings of the 33rd annual meeting on Association for + Computational Linguistics (ACL '95). Association for Computational Linguistics, + Stroudsburg, PA, USA, 189-196. .. _label_propagation: @@ -118,7 +118,7 @@ computing the normalized graph Laplacian matrix. This procedure is also used in :ref:`spectral_clustering`. Label propagation models have two built-in kernel methods. Choice of kernel -effects both scalability and performance of the algorithms. The following are +affects both scalability and performance of the algorithms. The following are available: * rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is @@ -134,18 +134,18 @@ algorithm can lead to prohibitively long running times. On the other hand, the KNN kernel will produce a much more memory-friendly sparse matrix which can drastically reduce running times. -.. topic:: Examples +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py` - * :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_semi_supervised_versus_svm_iris.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_structure.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits.py` +* :ref:`sphx_glr_auto_examples_semi_supervised_plot_label_propagation_digits_active_learning.py` -.. topic:: References +.. rubric:: References - [2] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised - Learning (2006), pp. 193-216 +[2] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised +Learning (2006), pp. 193-216 - [3] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient - Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 - https://www.gatsby.ucl.ac.uk/aistats/fullpapers/204.pdf +[3] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient +Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 +https://www.gatsby.ucl.ac.uk/aistats/fullpapers/204.pdf diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst index a7981e9d4ec28..103ae205387e3 100644 --- a/doc/modules/sgd.rst +++ b/doc/modules/sgd.rst @@ -18,8 +18,8 @@ recently in the context of large-scale learning. SGD has been successfully applied to large-scale and sparse machine learning problems often encountered in text classification and natural language processing. Given that the data is sparse, the classifiers -in this module easily scale to problems with more than 10^5 training -examples and more than 10^5 features. +in this module easily scale to problems with more than :math:`10^5` training +examples and more than :math:`10^5` features. Strictly speaking, SGD is merely an optimization technique and does not correspond to a specific family of machine learning models. It is only a @@ -91,12 +91,12 @@ SGD fits a linear model to the training data. The ``coef_`` attribute holds the model parameters:: >>> clf.coef_ - array([[9.9..., 9.9...]]) + array([[9.9, 9.9]]) The ``intercept_`` attribute holds the intercept (aka offset or bias):: >>> clf.intercept_ - array([-9.9...]) + array([-9.9]) Whether or not the model should use an intercept, i.e. a biased hyperplane, is controlled by the parameter ``fit_intercept``. @@ -106,7 +106,7 @@ the coefficients and the input sample, plus the intercept) is given by :meth:`SGDClassifier.decision_function`:: >>> clf.decision_function([[2., 2.]]) - array([29.6...]) + array([29.6]) The concrete loss function can be set via the ``loss`` parameter. :class:`SGDClassifier` supports the following loss functions: @@ -116,7 +116,7 @@ parameter. :class:`SGDClassifier` supports the following loss functions: * ``loss="log_loss"``: logistic regression, * and all regression losses below. In this case the target is encoded as -1 or 1, and the problem is treated as a regression problem. The predicted - class then correspond to the sign of the predicted target. + class then corresponds to the sign of the predicted target. Please refer to the :ref:`mathematical section below ` for formulas. @@ -131,7 +131,7 @@ Using ``loss="log_loss"`` or ``loss="modified_huber"`` enables the >>> clf = SGDClassifier(loss="log_loss", max_iter=5).fit(X, y) >>> clf.predict_proba([[1., 1.]]) # doctest: +SKIP - array([[0.00..., 0.99...]]) + array([[0.00, 0.99]]) The concrete penalty can be set via the ``penalty`` parameter. SGD supports the following penalties: @@ -189,14 +189,13 @@ For classification with a logistic loss, another variant of SGD with an averaging strategy is available with Stochastic Average Gradient (SAG) algorithm, available as a solver in :class:`LogisticRegression`. -.. topic:: Examples: +.. rubric:: Examples - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_separating_hyperplane.py`, - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_iris.py` - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_weighted_samples.py` - - :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_comparison.py` - - :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` - (See the Note in the example) +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_separating_hyperplane.py` +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_iris.py` +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_weighted_samples.py` +- :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` + (See the Note in the example) Regression ========== @@ -249,48 +248,48 @@ quadratic in the number of samples. with a large number of training samples (> 10,000) for which the SGD variant can be several orders of magnitude faster. -|details-start| -**Mathematical details** -|details-split| +.. dropdown:: Mathematical details -Its implementation is based on the implementation of the stochastic -gradient descent. Indeed, the original optimization problem of the One-Class -SVM is given by + Its implementation is based on the implementation of the stochastic + gradient descent. Indeed, the original optimization problem of the One-Class + SVM is given by -.. math:: - - \begin{aligned} - \min_{w, \rho, \xi} & \quad \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \xi_i \\ - \text{s.t.} & \quad \langle w, x_i \rangle \geq \rho - \xi_i \quad 1 \leq i \leq n \\ - & \quad \xi_i \geq 0 \quad 1 \leq i \leq n - \end{aligned} + .. math:: -where :math:`\nu \in (0, 1]` is the user-specified parameter controlling the -proportion of outliers and the proportion of support vectors. Getting rid of -the slack variables :math:`\xi_i` this problem is equivalent to + \begin{aligned} + \min_{w, \rho, \xi} & \quad \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \xi_i \\ + \text{s.t.} & \quad \langle w, x_i \rangle \geq \rho - \xi_i \quad 1 \leq i \leq n \\ + & \quad \xi_i \geq 0 \quad 1 \leq i \leq n + \end{aligned} -.. math:: + where :math:`\nu \in (0, 1]` is the user-specified parameter controlling the + proportion of outliers and the proportion of support vectors. Getting rid of + the slack variables :math:`\xi_i` this problem is equivalent to - \min_{w, \rho} \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \max(0, \rho - \langle w, x_i \rangle) \, . + .. math:: -Multiplying by the constant :math:`\nu` and introducing the intercept -:math:`b = 1 - \rho` we obtain the following equivalent optimization problem + \min_{w, \rho} \frac{1}{2}\Vert w \Vert^2 - \rho + \frac{1}{\nu n} \sum_{i=1}^n \max(0, \rho - \langle w, x_i \rangle) \, . -.. math:: + Multiplying by the constant :math:`\nu` and introducing the intercept + :math:`b = 1 - \rho` we obtain the following equivalent optimization problem - \min_{w, b} \frac{\nu}{2}\Vert w \Vert^2 + b\nu + \frac{1}{n} \sum_{i=1}^n \max(0, 1 - (\langle w, x_i \rangle + b)) \, . + .. math:: -This is similar to the optimization problems studied in section -:ref:`sgd_mathematical_formulation` with :math:`y_i = 1, 1 \leq i \leq n` and -:math:`\alpha = \nu/2`, :math:`L` being the hinge loss function and :math:`R` -being the L2 norm. We just need to add the term :math:`b\nu` in the -optimization loop. + \min_{w, b} \frac{\nu}{2}\Vert w \Vert^2 + b\nu + \frac{1}{n} \sum_{i=1}^n \max(0, 1 - (\langle w, x_i \rangle + b)) \, . -|details-end| + This is similar to the optimization problems studied in section + :ref:`sgd_mathematical_formulation` with :math:`y_i = 1, 1 \leq i \leq n` and + :math:`\alpha = \nu/2`, :math:`L` being the hinge loss function and :math:`R` + being the L2 norm. We just need to add the term :math:`b\nu` in the + optimization loop. As :class:`SGDClassifier` and :class:`SGDRegressor`, :class:`SGDOneClassSVM` supports averaged SGD. Averaging can be enabled by setting ``average=True``. +.. rubric:: Examples + +- :ref:`sphx_glr_auto_examples_linear_model_plot_sgdocsvm_vs_ocsvm.py` + Stochastic Gradient Descent for sparse data =========================================== @@ -305,9 +304,9 @@ efficiency, however, use the CSR matrix format as defined in `scipy.sparse.csr_matrix `_. -.. topic:: Examples: +.. rubric:: Examples - - :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +- :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` Complexity ========== @@ -339,8 +338,10 @@ criteria to stop the algorithm when a given level of convergence is reached: In both cases, the criterion is evaluated once by epoch, and the algorithm stops when the criterion does not improve ``n_iter_no_change`` times in a row. The improvement is evaluated with absolute tolerance ``tol``, and the algorithm -stops in any case after a maximum number of iteration ``max_iter``. +stops in any case after a maximum number of iterations ``max_iter``. +See :ref:`sphx_glr_auto_examples_linear_model_plot_sgd_early_stopping.py` for an +example of the effects of early stopping. Tips on Practical Use ===================== @@ -385,11 +386,11 @@ Tips on Practical Use * We found that Averaged SGD works best with a larger number of features and a higher eta0. -.. topic:: References: +.. rubric:: References - * `"Efficient BackProp" `_ - Y. LeCun, L. Bottou, G. Orr, K. Müller - In Neural Networks: Tricks - of the Trade 1998. +* `"Efficient BackProp" `_ + Y. LeCun, L. Bottou, G. Orr, K. Müller - In Neural Networks: Tricks + of the Trade 1998. .. _sgd_mathematical_formulation: @@ -400,8 +401,9 @@ We describe here the mathematical details of the SGD procedure. A good overview with convergence rates can be found in [#6]_. Given a set of training examples :math:`(x_1, y_1), \ldots, (x_n, y_n)` where -:math:`x_i \in \mathbf{R}^m` and :math:`y_i \in \mathcal{R}` (:math:`y_i \in -{-1, 1}` for classification), our goal is to learn a linear scoring function +:math:`x_i \in \mathbf{R}^m` and :math:`y_i \in \mathbf{R}` +(:math:`y_i \in \{-1, 1\}` for classification), +our goal is to learn a linear scoring function :math:`f(x) = w^T x + b` with model parameters :math:`w \in \mathbf{R}^m` and intercept :math:`b \in \mathbf{R}`. In order to make predictions for binary classification, we simply look at the sign of :math:`f(x)`. To find the model @@ -416,32 +418,28 @@ where :math:`L` is a loss function that measures model (mis)fit and complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls the regularization strength. -|details-start| -**Loss functions details** -|details-split| - -Different choices for :math:`L` entail different classifiers or regressors: - -- Hinge (soft-margin): equivalent to Support Vector Classification. - :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))`. -- Perceptron: - :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`. -- Modified Huber: - :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) > - -1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. -- Log Loss: equivalent to Logistic Regression. - :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`. -- Squared Error: Linear regression (Ridge or Lasso depending on - :math:`R`). - :math:`L(y_i, f(x_i)) = \frac{1}{2}(y_i - f(x_i))^2`. -- Huber: less sensitive to outliers than least-squares. It is equivalent to - least squares when :math:`|y_i - f(x_i)| \leq \varepsilon`, and - :math:`L(y_i, f(x_i)) = \varepsilon |y_i - f(x_i)| - \frac{1}{2} - \varepsilon^2` otherwise. -- Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression. - :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`. - -|details-end| +.. dropdown:: Loss functions details + + Different choices for :math:`L` entail different classifiers or regressors: + + - Hinge (soft-margin): equivalent to Support Vector Classification. + :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))`. + - Perceptron: + :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`. + - Modified Huber: + :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) > + -1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. + - Log Loss: equivalent to Logistic Regression. + :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`. + - Squared Error: Linear regression (Ridge or Lasso depending on + :math:`R`). + :math:`L(y_i, f(x_i)) = \frac{1}{2}(y_i - f(x_i))^2`. + - Huber: less sensitive to outliers than least-squares. It is equivalent to + least squares when :math:`|y_i - f(x_i)| \leq \varepsilon`, and + :math:`L(y_i, f(x_i)) = \varepsilon |y_i - f(x_i)| - \frac{1}{2} + \varepsilon^2` otherwise. + - Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression. + :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`. All of the above loss functions can be regarded as an upper bound on the misclassification error (Zero-one loss) as shown in the Figure below. @@ -553,32 +551,29 @@ We use the truncated gradient algorithm proposed in [#3]_ for L1 regularization (and the Elastic Net). The code is written in Cython. -.. topic:: References: +.. rubric:: References - .. [#1] `"Stochastic Gradient Descent" - `_ L. Bottou - Website, 2010. +.. [#1] `"Stochastic Gradient Descent" + `_ L. Bottou - Website, 2010. - .. [#2] :doi:`"Pegasos: Primal estimated sub-gradient solver for svm" - <10.1145/1273496.1273598>` - S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07. +.. [#2] :doi:`"Pegasos: Primal estimated sub-gradient solver for svm" + <10.1145/1273496.1273598>` + S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07. - .. [#3] `"Stochastic gradient descent training for l1-regularized - log-linear models with cumulative penalty" - `_ - Y. Tsuruoka, J. Tsujii, S. Ananiadou - In Proceedings of the AFNLP/ACL - '09. +.. [#3] `"Stochastic gradient descent training for l1-regularized + log-linear models with cumulative penalty" + `_ + Y. Tsuruoka, J. Tsujii, S. Ananiadou - In Proceedings of the AFNLP/ACL'09. - .. [#4] :arxiv:`"Towards Optimal One Pass Large Scale Learning with - Averaged Stochastic Gradient Descent" - <1107.2490v2>` - Xu, Wei (2011) +.. [#4] :arxiv:`"Towards Optimal One Pass Large Scale Learning with + Averaged Stochastic Gradient Descent" + <1107.2490v2>`. Xu, Wei (2011) - .. [#5] :doi:`"Regularization and variable selection via the elastic net" - <10.1111/j.1467-9868.2005.00503.x>` - H. Zou, T. Hastie - Journal of the Royal Statistical Society Series B, - 67 (2), 301-320. +.. [#5] :doi:`"Regularization and variable selection via the elastic net" + <10.1111/j.1467-9868.2005.00503.x>` + H. Zou, T. Hastie - Journal of the Royal Statistical Society Series B, + 67 (2), 301-320. - .. [#6] :doi:`"Solving large scale linear prediction problems using stochastic - gradient descent algorithms" - <10.1145/1015330.1015332>` - T. Zhang - In Proceedings of ICML '04. +.. [#6] :doi:`"Solving large scale linear prediction problems using stochastic + gradient descent algorithms" <10.1145/1015330.1015332>` + T. Zhang - In Proceedings of ICML '04. diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index e3bc1395819e9..ac9fbdb12e58d 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -108,11 +108,11 @@ properties of these support vectors can be found in attributes >>> clf.n_support_ array([1, 1]...) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane.py`, - * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py`, +* :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` +* :ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py` .. _svm_multi_class: @@ -126,7 +126,8 @@ classifiers are constructed and each one trains data from two classes. To provide a consistent interface with other classifiers, the ``decision_function_shape`` option allows to monotonically transform the results of the "one-versus-one" classifiers to a "one-vs-rest" decision -function of shape ``(n_samples, n_classes)``. +function of shape ``(n_samples, n_classes)``, which is the default setting +of the parameter (default='ovr'). >>> X = [[0], [1], [2], [3]] >>> Y = [0, 1, 2, 3] @@ -154,65 +155,61 @@ multi-class strategy, thus training `n_classes` models. See :ref:`svm_mathematical_formulation` for a complete description of the decision function. -|details-start| -**Details on multi-class strategies** -|details-split| - -Note that the :class:`LinearSVC` also implements an alternative multi-class -strategy, the so-called multi-class SVM formulated by Crammer and Singer -[#8]_, by using the option ``multi_class='crammer_singer'``. In practice, -one-vs-rest classification is usually preferred, since the results are mostly -similar, but the runtime is significantly less. - -For "one-vs-rest" :class:`LinearSVC` the attributes ``coef_`` and ``intercept_`` -have the shape ``(n_classes, n_features)`` and ``(n_classes,)`` respectively. -Each row of the coefficients corresponds to one of the ``n_classes`` -"one-vs-rest" classifiers and similar for the intercepts, in the -order of the "one" class. - -In the case of "one-vs-one" :class:`SVC` and :class:`NuSVC`, the layout of -the attributes is a little more involved. In the case of a linear -kernel, the attributes ``coef_`` and ``intercept_`` have the shape -``(n_classes * (n_classes - 1) / 2, n_features)`` and ``(n_classes * -(n_classes - 1) / 2)`` respectively. This is similar to the layout for -:class:`LinearSVC` described above, with each row now corresponding -to a binary classifier. The order for classes -0 to n is "0 vs 1", "0 vs 2" , ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", . . -. "n-1 vs n". - -The shape of ``dual_coef_`` is ``(n_classes-1, n_SV)`` with -a somewhat hard to grasp layout. -The columns correspond to the support vectors involved in any -of the ``n_classes * (n_classes - 1) / 2`` "one-vs-one" classifiers. -Each support vector ``v`` has a dual coefficient in each of the -``n_classes - 1`` classifiers comparing the class of ``v`` against another class. -Note that some, but not all, of these dual coefficients, may be zero. -The ``n_classes - 1`` entries in each column are these dual coefficients, -ordered by the opposing class. - -This might be clearer with an example: consider a three class problem with -class 0 having three support vectors -:math:`v^{0}_0, v^{1}_0, v^{2}_0` and class 1 and 2 having two support vectors -:math:`v^{0}_1, v^{1}_1` and :math:`v^{0}_2, v^{1}_2` respectively. For each -support vector :math:`v^{j}_i`, there are two dual coefficients. Let's call -the coefficient of support vector :math:`v^{j}_i` in the classifier between -classes :math:`i` and :math:`k` :math:`\alpha^{j}_{i,k}`. -Then ``dual_coef_`` looks like this: - -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{1}_{2,0}`| -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|:math:`\alpha^{0}_{0,2}`|:math:`\alpha^{1}_{0,2}`|:math:`\alpha^{2}_{0,2}`|:math:`\alpha^{0}_{1,2}`|:math:`\alpha^{1}_{1,2}`|:math:`\alpha^{0}_{2,1}`|:math:`\alpha^{1}_{2,1}`| -+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ -|Coefficients |Coefficients |Coefficients | -|for SVs of class 0 |for SVs of class 1 |for SVs of class 2 | -+--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+ - -|details-end| - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`, +.. dropdown:: Details on multi-class strategies + + Note that the :class:`LinearSVC` also implements an alternative multi-class + strategy, the so-called multi-class SVM formulated by Crammer and Singer + [#8]_, by using the option ``multi_class='crammer_singer'``. In practice, + one-vs-rest classification is usually preferred, since the results are mostly + similar, but the runtime is significantly less. + + For "one-vs-rest" :class:`LinearSVC` the attributes ``coef_`` and ``intercept_`` + have the shape ``(n_classes, n_features)`` and ``(n_classes,)`` respectively. + Each row of the coefficients corresponds to one of the ``n_classes`` + "one-vs-rest" classifiers and similar for the intercepts, in the + order of the "one" class. + + In the case of "one-vs-one" :class:`SVC` and :class:`NuSVC`, the layout of + the attributes is a little more involved. In the case of a linear + kernel, the attributes ``coef_`` and ``intercept_`` have the shape + ``(n_classes * (n_classes - 1) / 2, n_features)`` and ``(n_classes * + (n_classes - 1) / 2)`` respectively. This is similar to the layout for + :class:`LinearSVC` described above, with each row now corresponding + to a binary classifier. The order for classes + 0 to n is "0 vs 1", "0 vs 2" , ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", . . + . "n-1 vs n". + + The shape of ``dual_coef_`` is ``(n_classes-1, n_SV)`` with + a somewhat hard to grasp layout. + The columns correspond to the support vectors involved in any + of the ``n_classes * (n_classes - 1) / 2`` "one-vs-one" classifiers. + Each support vector ``v`` has a dual coefficient in each of the + ``n_classes - 1`` classifiers comparing the class of ``v`` against another class. + Note that some, but not all, of these dual coefficients, may be zero. + The ``n_classes - 1`` entries in each column are these dual coefficients, + ordered by the opposing class. + + This might be clearer with an example: consider a three class problem with + class 0 having three support vectors + :math:`v^{0}_0, v^{1}_0, v^{2}_0` and class 1 and 2 having two support vectors + :math:`v^{0}_1, v^{1}_1` and :math:`v^{0}_2, v^{1}_2` respectively. For each + support vector :math:`v^{j}_i`, there are two dual coefficients. Let's call + the coefficient of support vector :math:`v^{j}_i` in the classifier between + classes :math:`i` and :math:`k` :math:`\alpha^{j}_{i,k}`. + Then ``dual_coef_`` looks like this: + + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |:math:`\alpha^{0}_{0,1}`|:math:`\alpha^{1}_{0,1}`|:math:`\alpha^{2}_{0,1}`|:math:`\alpha^{0}_{1,0}`|:math:`\alpha^{1}_{1,0}`|:math:`\alpha^{0}_{2,0}`|:math:`\alpha^{1}_{2,0}`| + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |:math:`\alpha^{0}_{0,2}`|:math:`\alpha^{1}_{0,2}`|:math:`\alpha^{2}_{0,2}`|:math:`\alpha^{0}_{1,2}`|:math:`\alpha^{1}_{1,2}`|:math:`\alpha^{0}_{2,1}`|:math:`\alpha^{1}_{2,1}`| + +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+ + |Coefficients |Coefficients |Coefficients | + |for SVs of class 0 |for SVs of class 1 |for SVs of class 2 | + +--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+ + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py` .. _scores_probabilities: @@ -233,7 +230,7 @@ In the multiclass case, this is extended as per [#2]_. The same probability calibration procedure is available for all estimators via the :class:`~sklearn.calibration.CalibratedClassifierCV` (see :ref:`calibration`). In the case of :class:`SVC` and :class:`NuSVC`, this - procedure is builtin in `libsvm`_ which is used under the hood, so it does + procedure is builtin to `libsvm`_ which is used under the hood, so it does not rely on scikit-learn's :class:`~sklearn.calibration.CalibratedClassifierCV`. @@ -295,10 +292,10 @@ to the sample weights: :align: center :scale: 75 -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` - * :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py`, +* :ref:`sphx_glr_auto_examples_svm_plot_separating_hyperplane_unbalanced.py` +* :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` .. _svm_regression: @@ -343,9 +340,9 @@ floating point values instead of integer values:: array([1.5]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` .. _svm_outlier_detection: @@ -516,11 +513,10 @@ Proper choice of ``C`` and ``gamma`` is critical to the SVM's performance. One is advised to use :class:`~sklearn.model_selection.GridSearchCV` with ``C`` and ``gamma`` spaced exponentially far apart to choose good values. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py` - * :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py` +* :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py` +* :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py` Custom Kernels -------------- @@ -539,60 +535,52 @@ classifiers, except that: use of ``fit()`` and ``predict()`` you will have unexpected results. -|details-start| -**Using Python functions as kernels** -|details-split| +.. dropdown:: Using Python functions as kernels -You can use your own defined kernels by passing a function to the -``kernel`` parameter. + You can use your own defined kernels by passing a function to the + ``kernel`` parameter. -Your kernel must take as arguments two matrices of shape -``(n_samples_1, n_features)``, ``(n_samples_2, n_features)`` -and return a kernel matrix of shape ``(n_samples_1, n_samples_2)``. + Your kernel must take as arguments two matrices of shape + ``(n_samples_1, n_features)``, ``(n_samples_2, n_features)`` + and return a kernel matrix of shape ``(n_samples_1, n_samples_2)``. -The following code defines a linear kernel and creates a classifier -instance that will use that kernel:: + The following code defines a linear kernel and creates a classifier + instance that will use that kernel:: - >>> import numpy as np - >>> from sklearn import svm - >>> def my_kernel(X, Y): - ... return np.dot(X, Y.T) - ... - >>> clf = svm.SVC(kernel=my_kernel) - -|details-end| + >>> import numpy as np + >>> from sklearn import svm + >>> def my_kernel(X, Y): + ... return np.dot(X, Y.T) + ... + >>> clf = svm.SVC(kernel=my_kernel) -|details-start| -**Using the Gram matrix** -|details-split| +.. dropdown:: Using the Gram matrix -You can pass pre-computed kernels by using the ``kernel='precomputed'`` -option. You should then pass Gram matrix instead of X to the `fit` and -`predict` methods. The kernel values between *all* training vectors and the -test vectors must be provided: + You can pass pre-computed kernels by using the ``kernel='precomputed'`` + option. You should then pass Gram matrix instead of X to the `fit` and + `predict` methods. The kernel values between *all* training vectors and the + test vectors must be provided: - >>> import numpy as np - >>> from sklearn.datasets import make_classification - >>> from sklearn.model_selection import train_test_split - >>> from sklearn import svm - >>> X, y = make_classification(n_samples=10, random_state=0) - >>> X_train , X_test , y_train, y_test = train_test_split(X, y, random_state=0) - >>> clf = svm.SVC(kernel='precomputed') - >>> # linear kernel computation - >>> gram_train = np.dot(X_train, X_train.T) - >>> clf.fit(gram_train, y_train) - SVC(kernel='precomputed') - >>> # predict on training examples - >>> gram_test = np.dot(X_test, X_train.T) - >>> clf.predict(gram_test) - array([0, 1, 0]) + >>> import numpy as np + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn import svm + >>> X, y = make_classification(n_samples=10, random_state=0) + >>> X_train , X_test , y_train, y_test = train_test_split(X, y, random_state=0) + >>> clf = svm.SVC(kernel='precomputed') + >>> # linear kernel computation + >>> gram_train = np.dot(X_train, X_train.T) + >>> clf.fit(gram_train, y_train) + SVC(kernel='precomputed') + >>> # predict on training examples + >>> gram_test = np.dot(X_test, X_train.T) + >>> clf.predict(gram_test) + array([0, 1, 0]) -|details-end| +.. rubric:: Examples -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`. +* :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py` .. _svm_mathematical_formulation: @@ -671,14 +659,14 @@ Once the optimization problem is solved, the output of .. math:: \sum_{i\in SV} y_i \alpha_i K(x_i, x) + b, -and the predicted class correspond to its sign. We only need to sum over the +and the predicted class corresponds to its sign. We only need to sum over the support vectors (i.e. the samples that lie within the margin) because the dual coefficients :math:`\alpha_i` are zero for the other samples. These parameters can be accessed through the attributes ``dual_coef_`` which holds the product :math:`y_i \alpha_i`, ``support_vectors_`` which holds the support vectors, and ``intercept_`` which holds the independent -term :math:`b` +term :math:`b`. .. note:: @@ -687,45 +675,37 @@ term :math:`b` equivalence between the amount of regularization of two models depends on the exact objective function optimized by the model. For example, when the estimator used is :class:`~sklearn.linear_model.Ridge` regression, - the relation between them is given as :math:`C = \frac{1}{alpha}`. + the relation between them is given as :math:`C = \frac{1}{\alpha}`. -|details-start| -**LinearSVC** -|details-split| +.. dropdown:: LinearSVC -The primal problem can be equivalently formulated as + The primal problem can be equivalently formulated as -.. math:: - - \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, 1 - y_i (w^T \phi(x_i) + b)), + .. math:: -where we make use of the `hinge loss -`_. This is the form that is -directly optimized by :class:`LinearSVC`, but unlike the dual form, this one -does not involve inner products between samples, so the famous kernel trick -cannot be applied. This is why only the linear kernel is supported by -:class:`LinearSVC` (:math:`\phi` is the identity function). + \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, 1 - y_i (w^T \phi(x_i) + b)), -|details-end| + where we make use of the `hinge loss + `_. This is the form that is + directly optimized by :class:`LinearSVC`, but unlike the dual form, this one + does not involve inner products between samples, so the famous kernel trick + cannot be applied. This is why only the linear kernel is supported by + :class:`LinearSVC` (:math:`\phi` is the identity function). .. _nu_svc: -|details-start| -**NuSVC** -|details-split| - -The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the -:math:`C`-SVC and therefore mathematically equivalent. +.. dropdown:: NuSVC -We introduce a new parameter :math:`\nu` (instead of :math:`C`) which -controls the number of support vectors and *margin errors*: -:math:`\nu \in (0, 1]` is an upper bound on the fraction of margin errors and -a lower bound of the fraction of support vectors. A margin error corresponds -to a sample that lies on the wrong side of its margin boundary: it is either -misclassified, or it is correctly classified but does not lie beyond the -margin. + The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the + :math:`C`-SVC and therefore mathematically equivalent. -|details-end| + We introduce a new parameter :math:`\nu` (instead of :math:`C`) which + controls the number of support vectors and *margin errors*: + :math:`\nu \in (0, 1]` is an upper bound on the fraction of margin errors and + a lower bound of the fraction of support vectors. A margin error corresponds + to a sample that lies on the wrong side of its margin boundary: it is either + misclassified, or it is correctly classified but does not lie beyond the + margin. SVR --- @@ -774,21 +754,17 @@ which holds the difference :math:`\alpha_i - \alpha_i^*`, ``support_vectors_`` w holds the support vectors, and ``intercept_`` which holds the independent term :math:`b` -|details-start| -**LinearSVR** -|details-split| +.. dropdown:: LinearSVR -The primal problem can be equivalently formulated as - -.. math:: + The primal problem can be equivalently formulated as - \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, |y_i - (w^T \phi(x_i) + b)| - \varepsilon), + .. math:: -where we make use of the epsilon-insensitive loss, i.e. errors of less than -:math:`\varepsilon` are ignored. This is the form that is directly optimized -by :class:`LinearSVR`. + \min_ {w, b} \frac{1}{2} w^T w + C \sum_{i=1}^{n}\max(0, |y_i - (w^T \phi(x_i) + b)| - \varepsilon), -|details-end| + where we make use of the epsilon-insensitive loss, i.e. errors of less than + :math:`\varepsilon` are ignored. This is the form that is directly optimized + by :class:`LinearSVR`. .. _svm_implementation_details: @@ -804,38 +780,38 @@ used, please refer to their respective papers. .. _`libsvm`: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ .. _`liblinear`: https://www.csie.ntu.edu.tw/~cjlin/liblinear/ -.. topic:: References: +.. rubric:: References - .. [#1] Platt `"Probabilistic outputs for SVMs and comparisons to - regularized likelihood methods" - `_. +.. [#1] Platt `"Probabilistic outputs for SVMs and comparisons to + regularized likelihood methods" + `_. - .. [#2] Wu, Lin and Weng, `"Probability estimates for multi-class - classification by pairwise coupling" - `_, JMLR - 5:975-1005, 2004. +.. [#2] Wu, Lin and Weng, `"Probability estimates for multi-class + classification by pairwise coupling" + `_, + JMLR 5:975-1005, 2004. - .. [#3] Fan, Rong-En, et al., - `"LIBLINEAR: A library for large linear classification." - `_, - Journal of machine learning research 9.Aug (2008): 1871-1874. +.. [#3] Fan, Rong-En, et al., + `"LIBLINEAR: A library for large linear classification." + `_, + Journal of machine learning research 9.Aug (2008): 1871-1874. - .. [#4] Chang and Lin, `LIBSVM: A Library for Support Vector Machines - `_. +.. [#4] Chang and Lin, `LIBSVM: A Library for Support Vector Machines + `_. - .. [#5] Bishop, `Pattern recognition and machine learning - `_, - chapter 7 Sparse Kernel Machines +.. [#5] Bishop, `Pattern recognition and machine learning + `_, + chapter 7 Sparse Kernel Machines. - .. [#6] :doi:`"A Tutorial on Support Vector Regression" - <10.1023/B:STCO.0000035301.49549.88>` - Alex J. Smola, Bernhard Schölkopf - Statistics and Computing archive - Volume 14 Issue 3, August 2004, p. 199-222. +.. [#6] :doi:`"A Tutorial on Support Vector Regression" + <10.1023/B:STCO.0000035301.49549.88>` + Alex J. Smola, Bernhard Schölkopf - Statistics and Computing archive + Volume 14 Issue 3, August 2004, p. 199-222. - .. [#7] Schölkopf et. al `New Support Vector Algorithms - `_ +.. [#7] Schölkopf et. al `New Support Vector Algorithms + `_, + Neural Computation 12, 1207-1245 (2000). - .. [#8] Crammer and Singer `On the Algorithmic Implementation ofMulticlass - Kernel-based Vector Machines - `_, - JMLR 2001. +.. [#8] Crammer and Singer `On the Algorithmic Implementation of Multiclass + Kernel-based Vector Machines + `_, JMLR 2001. diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index b54b913573a34..ee36d9f6af1b2 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -146,82 +146,78 @@ Once trained, you can plot the tree with the :func:`plot_tree` function:: :scale: 75 :align: center -|details-start| -**Alternative ways to export trees** -|details-split| - -We can also export the tree in `Graphviz -`_ format using the :func:`export_graphviz` -exporter. If you use the `conda `_ package manager, the graphviz binaries -and the python package can be installed with `conda install python-graphviz`. - -Alternatively binaries for graphviz can be downloaded from the graphviz project homepage, -and the Python wrapper installed from pypi with `pip install graphviz`. - -Below is an example graphviz export of the above tree trained on the entire -iris dataset; the results are saved in an output file `iris.pdf`:: - - - >>> import graphviz # doctest: +SKIP - >>> dot_data = tree.export_graphviz(clf, out_file=None) # doctest: +SKIP - >>> graph = graphviz.Source(dot_data) # doctest: +SKIP - >>> graph.render("iris") # doctest: +SKIP - -The :func:`export_graphviz` exporter also supports a variety of aesthetic -options, including coloring nodes by their class (or value for regression) and -using explicit variable and class names if desired. Jupyter notebooks also -render these plots inline automatically:: - - >>> dot_data = tree.export_graphviz(clf, out_file=None, # doctest: +SKIP - ... feature_names=iris.feature_names, # doctest: +SKIP - ... class_names=iris.target_names, # doctest: +SKIP - ... filled=True, rounded=True, # doctest: +SKIP - ... special_characters=True) # doctest: +SKIP - >>> graph = graphviz.Source(dot_data) # doctest: +SKIP - >>> graph # doctest: +SKIP - -.. only:: html - - .. figure:: ../images/iris.svg - :align: center - -.. only:: latex - - .. figure:: ../images/iris.pdf - :align: center - -.. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_dtc_001.png - :target: ../auto_examples/tree/plot_iris_dtc.html - :align: center - :scale: 75 - -Alternatively, the tree can also be exported in textual format with the -function :func:`export_text`. This method doesn't require the installation -of external libraries and is more compact: - - >>> from sklearn.datasets import load_iris - >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn.tree import export_text - >>> iris = load_iris() - >>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) - >>> decision_tree = decision_tree.fit(iris.data, iris.target) - >>> r = export_text(decision_tree, feature_names=iris['feature_names']) - >>> print(r) - |--- petal width (cm) <= 0.80 - | |--- class: 0 - |--- petal width (cm) > 0.80 - | |--- petal width (cm) <= 1.75 - | | |--- class: 1 - | |--- petal width (cm) > 1.75 - | | |--- class: 2 - - -|details-end| - -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` - * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` +.. dropdown:: Alternative ways to export trees + + We can also export the tree in `Graphviz + `_ format using the :func:`export_graphviz` + exporter. If you use the `conda `_ package manager, the graphviz binaries + and the python package can be installed with `conda install python-graphviz`. + + Alternatively binaries for graphviz can be downloaded from the graphviz project homepage, + and the Python wrapper installed from pypi with `pip install graphviz`. + + Below is an example graphviz export of the above tree trained on the entire + iris dataset; the results are saved in an output file `iris.pdf`:: + + + >>> import graphviz # doctest: +SKIP + >>> dot_data = tree.export_graphviz(clf, out_file=None) # doctest: +SKIP + >>> graph = graphviz.Source(dot_data) # doctest: +SKIP + >>> graph.render("iris") # doctest: +SKIP + + The :func:`export_graphviz` exporter also supports a variety of aesthetic + options, including coloring nodes by their class (or value for regression) and + using explicit variable and class names if desired. Jupyter notebooks also + render these plots inline automatically:: + + >>> dot_data = tree.export_graphviz(clf, out_file=None, # doctest: +SKIP + ... feature_names=iris.feature_names, # doctest: +SKIP + ... class_names=iris.target_names, # doctest: +SKIP + ... filled=True, rounded=True, # doctest: +SKIP + ... special_characters=True) # doctest: +SKIP + >>> graph = graphviz.Source(dot_data) # doctest: +SKIP + >>> graph # doctest: +SKIP + + .. only:: html + + .. figure:: ../images/iris.svg + :align: center + + .. only:: latex + + .. figure:: ../images/iris.pdf + :align: center + + .. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_dtc_001.png + :target: ../auto_examples/tree/plot_iris_dtc.html + :align: center + :scale: 75 + + Alternatively, the tree can also be exported in textual format with the + function :func:`export_text`. This method doesn't require the installation + of external libraries and is more compact: + + >>> from sklearn.datasets import load_iris + >>> from sklearn.tree import DecisionTreeClassifier + >>> from sklearn.tree import export_text + >>> iris = load_iris() + >>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) + >>> decision_tree = decision_tree.fit(iris.data, iris.target) + >>> r = export_text(decision_tree, feature_names=iris['feature_names']) + >>> print(r) + |--- petal width (cm) <= 0.80 + | |--- class: 0 + |--- petal width (cm) > 0.80 + | |--- petal width (cm) <= 1.75 + | | |--- class: 1 + | |--- petal width (cm) > 1.75 + | | |--- class: 2 + + +.. rubric:: Examples + +* :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` +* :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` .. _tree_regression: @@ -248,9 +244,9 @@ instead of integer values:: >>> clf.predict([[1, 1]]) array([0.5]) -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression.py` +* :ref:`sphx_glr_auto_examples_tree_plot_tree_regression.py` .. _tree_multioutput: @@ -288,11 +284,11 @@ of shape ``(n_samples, n_outputs)`` then the resulting estimator will: ``predict_proba``. The use of multi-output trees for regression is demonstrated in -:ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`. In this example, the input +:ref:`sphx_glr_auto_examples_tree_plot_tree_regression.py`. In this example, the input X is a single real value and the outputs Y are the sine and cosine of X. -.. figure:: ../auto_examples/tree/images/sphx_glr_plot_tree_regression_multioutput_001.png - :target: ../auto_examples/tree/plot_tree_regression_multioutput.html +.. figure:: ../auto_examples/tree/images/sphx_glr_plot_tree_regression_002.png + :target: ../auto_examples/tree/plot_tree_regression.html :scale: 75 :align: center @@ -306,21 +302,16 @@ the lower half of those faces. :scale: 75 :align: center -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` -|details-start| -**References** -|details-split| +.. rubric:: References * M. Dumont et al, `Fast multi-class image annotation with random subwindows and multiple output randomized trees - `_, International Conference on - Computer Vision Theory and Applications 2009 - -|details-end| + `_, + International Conference on Computer Vision Theory and Applications 2009 .. _tree_complexity: @@ -391,7 +382,7 @@ Tips on practical use * If the samples are weighted, it will be easier to optimize the tree structure using weight-based pre-pruning criterion such as - ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least + ``min_weight_fraction_leaf``, which ensures that leaf nodes contain at least a fraction of the overall sum of the sample weights. * All decision trees use ``np.float32`` arrays internally. @@ -412,36 +403,32 @@ Tree algorithms: ID3, C4.5, C5.0 and CART What are all the various decision tree algorithms and how do they differ from each other? Which one is implemented in scikit-learn? -|details-start| -**Various decision tree algorithms** -|details-split| - -ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan. -The algorithm creates a multiway tree, finding for each node (i.e. in -a greedy manner) the categorical feature that will yield the largest -information gain for categorical targets. Trees are grown to their -maximum size and then a pruning step is usually applied to improve the -ability of the tree to generalize to unseen data. - -C4.5 is the successor to ID3 and removed the restriction that features -must be categorical by dynamically defining a discrete attribute (based -on numerical variables) that partitions the continuous attribute value -into a discrete set of intervals. C4.5 converts the trained trees -(i.e. the output of the ID3 algorithm) into sets of if-then rules. -The accuracy of each rule is then evaluated to determine the order -in which they should be applied. Pruning is done by removing a rule's -precondition if the accuracy of the rule improves without it. - -C5.0 is Quinlan's latest version release under a proprietary license. -It uses less memory and builds smaller rulesets than C4.5 while being -more accurate. - -CART (Classification and Regression Trees) is very similar to C4.5, but -it differs in that it supports numerical target variables (regression) and -does not compute rule sets. CART constructs binary trees using the feature -and threshold that yield the largest information gain at each node. - -|details-end| +.. dropdown:: Various decision tree algorithms + + ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan. + The algorithm creates a multiway tree, finding for each node (i.e. in + a greedy manner) the categorical feature that will yield the largest + information gain for categorical targets. Trees are grown to their + maximum size and then a pruning step is usually applied to improve the + ability of the tree to generalize to unseen data. + + C4.5 is the successor to ID3 and removed the restriction that features + must be categorical by dynamically defining a discrete attribute (based + on numerical variables) that partitions the continuous attribute value + into a discrete set of intervals. C4.5 converts the trained trees + (i.e. the output of the ID3 algorithm) into sets of if-then rules. + The accuracy of each rule is then evaluated to determine the order + in which they should be applied. Pruning is done by removing a rule's + precondition if the accuracy of the rule improves without it. + + C5.0 is Quinlan's latest version release under a proprietary license. + It uses less memory and builds smaller rulesets than C4.5 while being + more accurate. + + CART (Classification and Regression Trees) is very similar to C4.5, but + it differs in that it supports numerical target variables (regression) and + does not compute rule sets. CART constructs binary trees using the feature + and threshold that yield the largest information gain at each node. scikit-learn uses an optimized version of the CART algorithm; however, the scikit-learn implementation does not support categorical variables for now. @@ -515,39 +502,35 @@ Log Loss or Entropy: H(Q_m) = - \sum_k p_{mk} \log(p_{mk}) -|details-start| -**Shannon entropy** -|details-split| +.. dropdown:: Shannon entropy -The entropy criterion computes the Shannon entropy of the possible classes. It -takes the class frequencies of the training data points that reached a given -leaf :math:`m` as their probability. Using the **Shannon entropy as tree node -splitting criterion is equivalent to minimizing the log loss** (also known as -cross-entropy and multinomial deviance) between the true labels :math:`y_i` -and the probabilistic predictions :math:`T_k(x_i)` of the tree model :math:`T` for class :math:`k`. + The entropy criterion computes the Shannon entropy of the possible classes. It + takes the class frequencies of the training data points that reached a given + leaf :math:`m` as their probability. Using the **Shannon entropy as tree node + splitting criterion is equivalent to minimizing the log loss** (also known as + cross-entropy and multinomial deviance) between the true labels :math:`y_i` + and the probabilistic predictions :math:`T_k(x_i)` of the tree model :math:`T` for class :math:`k`. -To see this, first recall that the log loss of a tree model :math:`T` -computed on a dataset :math:`D` is defined as follows: + To see this, first recall that the log loss of a tree model :math:`T` + computed on a dataset :math:`D` is defined as follows: -.. math:: - - \mathrm{LL}(D, T) = -\frac{1}{n} \sum_{(x_i, y_i) \in D} \sum_k I(y_i = k) \log(T_k(x_i)) + .. math:: -where :math:`D` is a training dataset of :math:`n` pairs :math:`(x_i, y_i)`. + \mathrm{LL}(D, T) = -\frac{1}{n} \sum_{(x_i, y_i) \in D} \sum_k I(y_i = k) \log(T_k(x_i)) -In a classification tree, the predicted class probabilities within leaf nodes -are constant, that is: for all :math:`(x_i, y_i) \in Q_m`, one has: -:math:`T_k(x_i) = p_{mk}` for each class :math:`k`. + where :math:`D` is a training dataset of :math:`n` pairs :math:`(x_i, y_i)`. -This property makes it possible to rewrite :math:`\mathrm{LL}(D, T)` as the -sum of the Shannon entropies computed for each leaf of :math:`T` weighted by -the number of training data points that reached each leaf: + In a classification tree, the predicted class probabilities within leaf nodes + are constant, that is: for all :math:`(x_i, y_i) \in Q_m`, one has: + :math:`T_k(x_i) = p_{mk}` for each class :math:`k`. -.. math:: + This property makes it possible to rewrite :math:`\mathrm{LL}(D, T)` as the + sum of the Shannon entropies computed for each leaf of :math:`T` weighted by + the number of training data points that reached each leaf: - \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m) + .. math:: -|details-end| + \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m) Regression criteria ------------------- @@ -568,17 +551,18 @@ Mean Squared Error: H(Q_m) = \frac{1}{n_m} \sum_{y \in Q_m} (y - \bar{y}_m)^2 -Half Poisson deviance: +Mean Poisson deviance: .. math:: - H(Q_m) = \frac{1}{n_m} \sum_{y \in Q_m} (y \log\frac{y}{\bar{y}_m} + H(Q_m) = \frac{2}{n_m} \sum_{y \in Q_m} (y \log\frac{y}{\bar{y}_m} - y + \bar{y}_m) Setting `criterion="poisson"` might be a good choice if your target is a count or a frequency (count per some unit). In any case, :math:`y >= 0` is a necessary condition to use this criterion. Note that it fits much slower than -the MSE criterion. +the MSE criterion. For performance reasons the actual implementation minimizes +the half mean poisson deviance, i.e. the mean poisson deviance divided by 2. Mean Absolute Error: @@ -595,11 +579,21 @@ Note that it fits much slower than the MSE criterion. Missing Values Support ====================== -:class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor` -have built-in support for missing values when `splitter='best'` and criterion is -`'gini'`, `'entropy`', or `'log_loss'`, for classification or +:class:`DecisionTreeClassifier`, :class:`DecisionTreeRegressor` +have built-in support for missing values using `splitter='best'`, where +the splits are determined in a greedy fashion. +:class:`ExtraTreeClassifier`, and :class:`ExtraTreeRegressor` have built-in +support for missing values for `splitter='random'`, where the splits +are determined randomly. For more details on how the splitter differs on +non-missing values, see the :ref:`Forest section `. + +The criterion supported when there are missing values are +`'gini'`, `'entropy'`, or `'log_loss'`, for classification or `'squared_error'`, `'friedman_mse'`, or `'poisson'` for regression. +First we will describe how :class:`DecisionTreeClassifier`, :class:`DecisionTreeRegressor` +handle missing-values in the data. + For each potential threshold on the non-missing data, the splitter will evaluate the split with all the missing values going to the left node or the right node. @@ -650,6 +644,22 @@ Decisions are made as follows: >>> tree.predict(X_test) array([1]) +:class:`ExtraTreeClassifier`, and :class:`ExtraTreeRegressor` handle missing values +in a slightly different way. When splitting a node, a random threshold will be chosen +to split the non-missing values on. Then the non-missing values will be sent to the +left and right child based on the randomly selected threshold, while the missing +values will also be randomly sent to the left or right child. This is repeated for +every feature considered at each split. The best split among these is chosen. + +During prediction, the treatment of missing-values is the same as that of the +decision tree: + +- By default when predicting, the samples with missing values are classified + with the class used in the split found during training. + +- If no missing values are seen during training for a given feature, then during + prediction missing values are mapped to the child with the most samples. + .. _minimal_cost_complexity_pruning: Minimal Cost-Complexity Pruning @@ -685,13 +695,11 @@ with the smallest value of :math:`\alpha_{eff}` is the weakest link and will be pruned. This process stops when the pruned tree's minimal :math:`\alpha_{eff}` is greater than the ``ccp_alpha`` parameter. -.. topic:: Examples: +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py` +* :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py` -|details-start| -**References** -|details-split| +.. rubric:: References .. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification and Regression Trees. Wadsworth, Belmont, CA, 1984. @@ -705,5 +713,3 @@ be pruned. This process stops when the pruned tree's minimal * T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical Learning, Springer, 2009. - -|details-end| diff --git a/doc/modules/unsupervised_reduction.rst b/doc/modules/unsupervised_reduction.rst index 90c80714c3131..12f3647454861 100644 --- a/doc/modules/unsupervised_reduction.rst +++ b/doc/modules/unsupervised_reduction.rst @@ -9,7 +9,7 @@ If your number of features is high, it may be useful to reduce it with an unsupervised step prior to supervised steps. Many of the :ref:`unsupervised-learning` methods implement a ``transform`` method that can be used to reduce the dimensionality. Below we discuss two specific -example of this pattern that are heavily used. +examples of this pattern that are heavily used. .. topic:: **Pipelining** @@ -24,9 +24,9 @@ PCA: principal component analysis :class:`decomposition.PCA` looks for a combination of features that capture well the variance of the original features. See :ref:`decompositions`. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` +* :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py` Random projections ------------------- @@ -35,9 +35,9 @@ The module: :mod:`~sklearn.random_projection` provides several tools for data reduction by random projections. See the relevant section of the documentation: :ref:`random_projection`. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` +* :ref:`sphx_glr_auto_examples_miscellaneous_plot_johnson_lindenstrauss_bound.py` Feature agglomeration ------------------------ @@ -46,10 +46,10 @@ Feature agglomeration :ref:`hierarchical_clustering` to group together features that behave similarly. -.. topic:: **Examples** +.. rubric:: Examples - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py` - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_agglomeration.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py` +* :ref:`sphx_glr_auto_examples_cluster_plot_digits_agglomeration.py` .. topic:: **Feature scaling** diff --git a/doc/preface.rst b/doc/preface.rst deleted file mode 100644 index 447083a3a8136..0000000000000 --- a/doc/preface.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. This helps define the TOC ordering for "about us" sections. Particularly - useful for PDF output as this section is not linked from elsewhere. - -.. Places global toc into the sidebar - -:globalsidebartoc: True - -.. _preface_menu: - -.. include:: includes/big_toc_css.rst -.. include:: tune_toc.rst - -======================= -Welcome to scikit-learn -======================= - -| - -.. toctree:: - :maxdepth: 2 - - install - faq - support - related_projects - about - testimonials/testimonials - whats_new - roadmap - governance - -| diff --git a/doc/presentations.rst b/doc/presentations.rst index 19fd09218b5fd..25a947d180e00 100644 --- a/doc/presentations.rst +++ b/doc/presentations.rst @@ -1,12 +1,49 @@ +.. _external_resources: + =========================================== External Resources, Videos and Talks =========================================== -For written tutorials, see the :ref:`Tutorial section ` of -the documentation. +The scikit-learn MOOC +===================== + +If you are new to scikit-learn, or looking to strengthen your understanding, +we highly recommend the **scikit-learn MOOC (Massive Open Online Course)**. + +The MOOC, created and maintained by some of the scikit-learn core-contributors, +is **free of charge** and is designed to help learners of all levels master +machine learning using scikit-learn. It covers topics +from the fundamental machine learning concepts to more advanced areas like +predictive modeling pipelines and model evaluation. + +The course materials are available on the +`scikit-learn MOOC website `_. + +This course is also hosted on the `FUN platform +`_, +which additionally makes the content interactive without the need to install +anything, and gives access to a discussion forum. + +The videos are available on the +`Inria Learning Lab channel `_ +in a +`playlist `__. + +.. _videos: + +Videos +====== + +- The `scikit-learn YouTube channel `_ + features a + `playlist `__ + of videos + showcasing talks by maintainers + and community members. New to Scientific Python? ========================== + For those that are still new to the scientific Python ecosystem, we highly recommend the `Python Scientific Lecture Notes `_. This will help you find your footing a @@ -21,58 +58,3 @@ specific subject areas: - `Machine Learning for NeuroImaging in Python `_ - `Machine Learning for Astronomical Data Analysis `_ - -.. _videos: - -Videos -====== - -- An introduction to scikit-learn `Part - I `_ and - `Part II `_ at Scipy 2013 - by `Gael Varoquaux`_, `Jake Vanderplas`_ and `Olivier Grisel`_. Notebooks on - `github `_. - -- `Introduction to scikit-learn - `_ by `Gael Varoquaux`_ at - ICML 2010 - - A three minute video from a very early stage of scikit-learn, explaining the - basic idea and approach we are following. - -- `Introduction to statistical learning with scikit-learn `_ - by `Gael Varoquaux`_ at SciPy 2011 - - An extensive tutorial, consisting of four sessions of one hour. - The tutorial covers the basics of machine learning, - many algorithms and how to apply them using scikit-learn. The - material corresponding is now in the scikit-learn documentation - section :ref:`stat_learn_tut_index`. - -- `Statistical Learning for Text Classification with scikit-learn and NLTK - `_ - (and `slides `_) - by `Olivier Grisel`_ at PyCon 2011 - - Thirty minute introduction to text classification. Explains how to - use NLTK and scikit-learn to solve real-world text classification - tasks and compares against cloud-based solutions. - -- `Introduction to Interactive Predictive Analytics in Python with scikit-learn `_ - by `Olivier Grisel`_ at PyCon 2012 - - 3-hours long introduction to prediction tasks using scikit-learn. - -- `scikit-learn - Machine Learning in Python `_ - by `Jake Vanderplas`_ at the 2012 PyData workshop at Google - - Interactive demonstration of some scikit-learn features. 75 minutes. - -- `scikit-learn tutorial `_ by `Jake Vanderplas`_ at PyData NYC 2012 - - Presentation using the online tutorial, 45 minutes. - - -.. _Gael Varoquaux: https://gael-varoquaux.info -.. _Jake Vanderplas: http://www.vanderplas.com -.. _Olivier Grisel: https://twitter.com/ogrisel diff --git a/doc/related_projects.rst b/doc/related_projects.rst index e6d0bd83f0a16..a7a10aef7929e 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -19,14 +19,6 @@ Interoperability and framework enhancements These tools adapt scikit-learn for use with other technologies or otherwise enhance the functionality of scikit-learn's estimators. -**Data formats** - -- `sklearn_pandas `_ bridge for - scikit-learn pipelines and pandas data frame with dedicated transformers. - -- `sklearn_xarray `_ provides - compatibility of scikit-learn estimators with xarray data structures. - **Auto-ML** - `auto-sklearn `_ @@ -48,31 +40,28 @@ enhance the functionality of scikit-learn's estimators. transforming temporal and relational datasets into feature matrices for machine learning. -- `Neuraxle `_ - A library for building neat pipelines, providing the right abstractions to - both ease research, development, and deployment of machine learning - applications. Compatible with deep learning frameworks and scikit-learn API, - it can stream minibatches, use data checkpoints, build funky pipelines, and - serialize models with custom per-step savers. - - `EvalML `_ - EvalML is an AutoML library which builds, optimizes, and evaluates + An AutoML library which builds, optimizes, and evaluates machine learning pipelines using domain-specific objective functions. It incorporates multiple modeling libraries under one API, and the objects that EvalML creates use an sklearn-compatible API. +- `MLJAR AutoML `_ + A Python package for AutoML on Tabular Data with Feature Engineering, + Hyper-Parameters Tuning, Explanations and Automatic Documentation. + **Experimentation and model registry frameworks** -- `MLFlow `_ MLflow is an open source platform to manage the ML +- `MLFlow `_ An open source platform to manage the ML lifecycle, including experimentation, reproducibility, deployment, and a central model registry. -- `Neptune `_ Metadata store for MLOps, +- `Neptune `_ A metadata store for MLOps, built for teams that run a lot of experiments. It gives you a single place to log, store, display, organize, compare, and query all your model building metadata. -- `Sacred `_ Tool to help you configure, +- `Sacred `_ A tool to help you configure, organize, log and reproduce experiments - `Scikit-Learn Laboratory @@ -82,12 +71,11 @@ enhance the functionality of scikit-learn's estimators. **Model inspection and visualization** -- `dtreeviz `_ A python library for +- `dtreeviz `_ A Python library for decision tree visualization and model interpretation. -- `eli5 `_ A library for - debugging/inspecting machine learning models and explaining their - predictions. +- `model-diagnostics `_ Tools for + diagnostics and assessment of (machine learning) models (in Python). - `sklearn-evaluation `_ Machine learning model evaluation made easy: plots, tables, HTML reports, @@ -98,17 +86,6 @@ enhance the functionality of scikit-learn's estimators. custom matplotlib visualizers for scikit-learn estimators to support visual feature analysis, model selection, evaluation, and diagnostics. -**Model selection** - -- `scikit-optimize `_ - A library to minimize (very) expensive and noisy black-box functions. It - implements several methods for sequential model-based optimization, and - includes a replacement for ``GridSearchCV`` or ``RandomizedSearchCV`` to do - cross-validated parameter search using any of these strategies. - -- `sklearn-deap `_ Use evolutionary - algorithms instead of gridsearch in scikit-learn. - **Model export for production** - `sklearn-onnx `_ Serialization of many @@ -124,22 +101,10 @@ enhance the functionality of scikit-learn's estimators. into PMML with the help of `JPMML-SkLearn `_ library. -- `sklearn-porter `_ - Transpile trained scikit-learn models to C, Java, Javascript and others. - -- `m2cgen `_ - A lightweight library which allows to transpile trained machine learning - models including many scikit-learn estimators into a native code of C, Java, - Go, R, PHP, Dart, Haskell, Rust and many other programming languages. - - `treelite `_ Compiles tree-based ensemble models into C code for minimizing prediction latency. -- `micromlgen `_ - MicroML brings Machine Learning algorithms to microcontrollers. - Supports several scikit-learn classifiers by transpiling them to C code. - - `emlearn `_ Implements scikit-learn estimators in C99 for embedded devices and microcontrollers. Supports several classifier, regression and outlier detection models. @@ -155,6 +120,13 @@ enhance the functionality of scikit-learn's estimators. ``scikit-learn`` itself. If you encounter issues while using this project, make sure you report potential issues in their respective repositories. +**Interface to R with genomic applications** + +- `BiocSklearn `_ + Exposes a small number of dimension reduction facilities as an illustration + of the basilisk protocol for interfacing Python with R. Intended as a + springboard for more complete interop. + Other estimators and tasks -------------------------- @@ -166,17 +138,21 @@ and tasks. **Time series and forecasting** -- `Darts `_ Darts is a Python library for +- `aeon `_ A + scikit-learn compatible toolbox for machine learning with time series + (fork of `sktime`_). + +- `Darts `_ A Python library for user-friendly forecasting and anomaly detection on time series. It contains a variety of models, from classics such as ARIMA to deep neural networks. The forecasting models can all be used in the same way, using fit() and predict() functions, similar to scikit-learn. -- `sktime `_ A scikit-learn compatible +- `sktime `_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting. -- `skforecast `_ A python library +- `skforecast `_ A Python library that eases using scikit-learn regressors as multi-step forecasters. It also works with any regressor compatible with the scikit-learn API. @@ -202,18 +178,9 @@ Note scikit-learn own modern gradient boosting estimators - `HMMLearn `_ Implementation of hidden markov models that was previously part of scikit-learn. -- `PyStruct `_ General conditional random fields - and structured prediction. - - `pomegranate `_ Probabilistic modelling for Python, with an emphasis on hidden Markov models. -- `sklearn-crfsuite `_ - Linear-chain conditional random fields - (`CRFsuite `_ wrapper with - sklearn-like API). - - **Deep neural networks etc.** - `skorch `_ A scikit-learn compatible @@ -246,28 +213,12 @@ Note scikit-learn own modern gradient boosting estimators **Other regression and classification** -- `ML-Ensemble `_ Generalized - ensemble learning (stacking, blending, subsemble, deep ensembles, - etc.). - -- `lightning `_ Fast - state-of-the-art linear model solvers (SDCA, AdaGrad, SVRG, SAG, etc...). - -- `py-earth `_ Multivariate - adaptive regression splines - - `gplearn `_ Genetic Programming for symbolic regression tasks. - `scikit-multilearn `_ Multi-label classification with focus on label space manipulation. -- `seglearn `_ Time series and sequence - learning using sliding window segmentation. - -- `fastFM `_ Fast factorization machine - implementation compatible with scikit-learn - **Decomposition and clustering** - `lda `_: Fast implementation of latent @@ -286,10 +237,6 @@ Note scikit-learn own modern gradient boosting estimators Linkage clustering algorithms for robust variable density clustering. As of scikit-learn version 1.3.0, there is :class:`~sklearn.cluster.HDBSCAN`. -- `spherecluster `_ Spherical - K-means and mixture of von Mises Fisher clustering routines for data on the - unit hypersphere. - **Pre-processing** - `categorical-encoding @@ -298,6 +245,10 @@ Note scikit-learn own modern gradient boosting estimators As of scikit-learn version 1.3.0, there is :class:`~sklearn.preprocessing.TargetEncoder`. +- `skrub `_ : facilitate learning on dataframes, + with sklearn compatible encoders (of categories, dates, strings) and + more. + - `imbalanced-learn `_ Various methods to under- and over-sample datasets. @@ -333,7 +284,7 @@ Other packages useful for data analysis and machine learning. - `PyMC `_ Bayesian statistical models and fitting algorithms. -- `Seaborn `_ Visualization library based on +- `Seaborn `_ A visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. - `scikit-survival `_ A library implementing @@ -349,9 +300,6 @@ Recommendation Engine packages - `lightfm `_ A Python/Cython implementation of a hybrid recommender system. -- `OpenRec `_ TensorFlow-based - neural-network inspired recommendation algorithms. - - `Surprise Lib `_ Library for explicit feedback datasets. @@ -361,7 +309,7 @@ Domain specific packages - `scikit-network `_ Machine learning on graphs. - `scikit-image `_ Image processing and computer - vision in python. + vision in Python. - `Natural language toolkit (nltk) `_ Natural language processing and some machine learning. diff --git a/doc/roadmap.rst b/doc/roadmap.rst index 3d6cda2d6c969..a9e3e73d01deb 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -13,7 +13,7 @@ Roadmap Purpose of this document ------------------------ -This document list general directions that core contributors are interested +This document lists general directions that core contributors are interested to see developed in scikit-learn. The fact that an item is listed here is in no way a promise that it will happen, as resources are limited. Rather, it is an indication that help is welcomed on this topic. @@ -69,29 +69,17 @@ the document up to date as we work on these issues. #. Improved handling of Pandas DataFrames * document current handling - * column reordering issue :issue:`7242` - * avoiding unnecessary conversion to ndarray |ss| :issue:`12147` |se| - * returning DataFrames from transformers :issue:`5523` - * getting DataFrames from dataset loaders |ss| :issue:`10733` |se|, - |ss| :issue:`13902` |se| - * Sparse currently not considered |ss| :issue:`12800` |se| #. Improved handling of categorical features * Tree-based models should be able to handle both continuous and categorical - features :issue:`12866` and |ss| :issue:`15550` |se|. - * |ss| In dataset loaders :issue:`13902` |se| - * As generic transformers to be used with ColumnTransforms (e.g. ordinal - encoding supervised by correlation with target variable) :issue:`5853`, - :issue:`11805` + features :issue:`29437`. * Handling mixtures of categorical and continuous variables #. Improved handling of missing data - * Making sure meta-estimators are lenient towards missing data, - |ss| :issue:`15319` |se| - * Non-trivial imputers |ss| :issue:`11977`, :issue:`12852` |se| - * Learners directly handling missing data |ss| :issue:`13911` |se| + * Making sure meta-estimators are lenient towards missing data by implementing + a common test. * An amputation sample generator to make parts of a dataset go missing :issue:`6284` @@ -101,16 +89,8 @@ the document up to date as we work on these issues. documentation is crowded which makes it hard for beginners to get the big picture. Some work could be done in prioritizing the information. -#. Passing around information that is not (X, y): Sample properties - - * We need to be able to pass sample weights to scorers in cross validation. - * We should have standard/generalised ways of passing sample-wise properties - around in meta-estimators. :issue:`4497` :issue:`7646` - #. Passing around information that is not (X, y): Feature properties - * Feature names or descriptions should ideally be available to fit for, e.g. - . :issue:`6425` :issue:`6424` * Per-feature handling (e.g. "is this a nominal / ordinal / English language text?") should also not need to be provided to estimator constructors, ideally, but should be available as metadata alongside X. :issue:`8480` @@ -124,28 +104,21 @@ the document up to date as we work on these issues. #. Make it easier for external users to write Scikit-learn-compatible components - * More flexible estimator checks that do not select by estimator name - |ss| :issue:`6599` |se| :issue:`6715` - * Example of how to develop an estimator or a meta-estimator, - |ss| :issue:`14582` |se| * More self-sufficient running of scikit-learn-contrib or a similar resource #. Support resampling and sample reduction * Allow subsampling of majority classes (in a pipeline?) :issue:`3855` - * Implement random forests with resampling :issue:`13227` #. Better interfaces for interactive development - * |ss| __repr__ and HTML visualisations of estimators - :issue:`6323` and :pr:`14180` |se|. - * Include plotting tools, not just as examples. :issue:`9173` + * Improve the HTML visualisations of estimators via the `estimator_html_repr`. + * Include more plotting tools, not just as examples. #. Improved tools for model diagnostics and basic inference - * |ss| alternative feature importances implementations, :issue:`13146` |se| + * work on a unified interface for "feature importance" * better ways to handle validation sets when fitting - * better ways to find thresholds / create decision rules :issue:`8614` #. Better tools for selecting hyperparameters with transductive estimators @@ -176,11 +149,6 @@ the document up to date as we work on these issues. learning is on smaller data than ETL, hence we can maybe adapt to very large scale while supporting only a fraction of the patterns. -#. Support for working with pre-trained models - - * Estimator "freezing". In particular, right now it's impossible to clone a - `CalibratedClassifierCV` with prefit. :issue:`8370`. :issue:`6451` - #. Backwards-compatible de/serialization of some estimators * Currently serialization (with pickle) breaks across versions. While we may @@ -202,15 +170,15 @@ the document up to date as we work on these issues. versions: * Try to load the old pickle, if it works, use the validation set - prediction snapshot to detect that the serialized model still behave + prediction snapshot to detect that the serialized model still behaves the same; - * If joblib.load / pickle.load not work, use the versioned control + * If joblib.load / pickle.load does not work, use the versioned control training script + historical training set to retrain the model and use the validation set prediction snapshot to assert that it is possible to recover the previous predictive performance: if this is not the case there is probably a bug in scikit-learn that needs to be reported. -#. Everything in Scikit-learn should probably conform to our API contract. +#. Everything in scikit-learn should probably conform to our API contract. We are still in the process of making decisions on some of these related issues. @@ -230,43 +198,3 @@ the document up to date as we work on these issues. * Document good practices to detect temporal distribution drift for deployed model and good practices for re-training on fresh data without causing catastrophic predictive performance regressions. - - -Subpackage-specific goals -------------------------- - -:mod:`sklearn.ensemble` - -* |ss| a stacking implementation, :issue:`11047` |se| - -:mod:`sklearn.cluster` - -* kmeans variants for non-Euclidean distances, if we can show these have - benefits beyond hierarchical clustering. - -:mod:`sklearn.model_selection` - -* |ss| multi-metric scoring is slow :issue:`9326` |se| -* perhaps we want to be able to get back more than multiple metrics -* the handling of random states in CV splitters is a poor design and - contradicts the validation of similar parameters in estimators, - `SLEP011 `_ -* exploit warm-starting and path algorithms so the benefits of `EstimatorCV` - objects can be accessed via `GridSearchCV` and used in Pipelines. - :issue:`1626` -* Cross-validation should be able to be replaced by OOB estimates whenever a - cross-validation iterator is used. -* Redundant computations in pipelines should be avoided (related to point - above) cf `dask-ml - `_ - -:mod:`sklearn.neighbors` - -* |ss| Ability to substitute a custom/approximate/precomputed nearest neighbors - implementation for ours in all/most contexts that nearest neighbors are used - for learning. :issue:`10463` |se| - -:mod:`sklearn.pipeline` - -* Performance issues with `Pipeline.memory` -* see "Everything in Scikit-learn should conform to our API contract" above diff --git a/doc/scss/api-search.scss b/doc/scss/api-search.scss new file mode 100644 index 0000000000000..51cf15f92c1cb --- /dev/null +++ b/doc/scss/api-search.scss @@ -0,0 +1,111 @@ +/** + * This is the styling for the API index page (`api/index`), in particular for the API + * search table. It involves overriding the style sheet of DataTables which does not + * fit well into the theme, especially in dark theme; see https://datatables.net/ + */ + +.dt-container { + margin-bottom: 2rem; + + // Fix the selection box for entries per page + select.dt-input { + padding: 0 !important; + margin-right: 0.4rem !important; + + > option { + color: var(--pst-color-text-base); + background-color: var(--pst-color-background); + } + } + + // Fix the search box + input.dt-input { + width: 50%; + line-height: normal; + padding: 0.1rem 0.3rem !important; + margin-left: 0.4rem !important; + } + + table.dataTable { + th { + // Avoid table header being too tall + p { + margin-bottom: 0; + } + + // Fix the ascending/descending order buttons in the header + span.dt-column-order { + &::before, + &::after { + color: var(--pst-color-text-base); + line-height: 0.7rem !important; + } + } + } + + td { + // Fix color of text warning no records found + &.dt-empty { + color: var(--pst-color-text-base) !important; + } + } + + // Unset bottom border of the last row + tr:last-child > * { + border-bottom: unset !important; + } + } + + div.dt-paging button.dt-paging-button { + padding: 0 0.5rem; + + &.disabled { + color: var(--pst-color-border) !important; + + // Overwrite the !important color assigned by DataTables because we must keep + // the color of disabled buttons consistent with and without hovering + &:hover { + color: var(--pst-color-border) !important; + } + } + + // Fix colors of paging buttons + &.current, + &:not(.disabled):not(.current):hover { + color: var(--pst-color-on-surface) !important; + border-color: var(--pst-color-surface) !important; + background: var(--pst-color-surface) !important; + } + + // Highlight the border of the current selected paging button + &.current { + border-color: var(--pst-color-text-base) !important; + } + } +} + +// Styling the object description cells in the table +div.sk-apisearch-desc { + p { + margin-bottom: 0; + } + + div.caption > p { + a, + code { + color: var(--pst-color-text-muted); + } + + code { + padding: 0; + font-size: 0.7rem; + font-weight: var(--pst-font-weight-caption); + background-color: transparent; + } + + .sd-badge { + font-size: 0.7rem; + margin-left: 0.3rem; + } + } +} diff --git a/doc/scss/api.scss b/doc/scss/api.scss new file mode 100644 index 0000000000000..d7110def4ac09 --- /dev/null +++ b/doc/scss/api.scss @@ -0,0 +1,52 @@ +/** + * This is the styling for API reference pages, currently under `modules/generated`. + * Note that it should be applied *ONLY* to API reference pages, as the selectors are + * designed based on how `autodoc` and `autosummary` generate the stuff. + */ + +// Make the admonitions more compact +div.versionadded, +div.versionchanged, +div.deprecated { + margin: 1rem auto; + + > p { + margin: 0.3rem auto; + } +} + +// Make docstrings more compact +dd { + p:not(table *) { + margin-bottom: 0.5rem !important; + } + + ul { + margin-bottom: 0.5rem !important; + padding-left: 2rem !important; + } +} + +// The first method is too close the the docstring above +dl.py.method:first-of-type { + margin-top: 2rem; +} + +// https://github.com/pydata/pydata-sphinx-theme/blob/8cf45f835bfdafc5f3821014a18f3b7e0fc2d44b/src/pydata_sphinx_theme/assets/styles/content/_api.scss +dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) { + margin-bottom: 1.5rem; + + dd { + margin-left: 1.2rem; + } + + // "Parameters", "Returns", etc. in the docstring + dt.field-odd, + dt.field-even { + margin: 0.5rem 0; + + + dd > dl { + margin-bottom: 0.5rem; + } + } +} diff --git a/doc/scss/colors.scss b/doc/scss/colors.scss new file mode 100644 index 0000000000000..bbc6aa6c2a3d6 --- /dev/null +++ b/doc/scss/colors.scss @@ -0,0 +1,51 @@ +/** + * This is the style sheet for customized colors of scikit-learn. + * Tints and shades are generated by https://colorkit.co/color-shades-generator/ + * + * This file is compiled into styles/colors.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +:root { + /* scikit-learn cyan */ + --sk-cyan-tint-9: #edf7fd; + --sk-cyan-tint-8: #daeffa; + --sk-cyan-tint-7: #c8e6f8; + --sk-cyan-tint-6: #b5def5; + --sk-cyan-tint-5: #a2d6f2; + --sk-cyan-tint-4: #8fcdef; + --sk-cyan-tint-3: #7ac5ec; + --sk-cyan-tint-2: #64bce9; + --sk-cyan-tint-1: #4bb4e5; + --sk-cyan: #29abe2; + --sk-cyan-shades-1: #2294c4; + --sk-cyan-shades-2: #1c7ea8; + --sk-cyan-shades-3: #15688c; + --sk-cyan-shades-4: #0f5471; + --sk-cyan-shades-5: #094057; + --sk-cyan-shades-6: #052d3e; + --sk-cyan-shades-7: #021b27; + --sk-cyan-shades-8: #010b12; + --sk-cyan-shades-9: #000103; + + /* scikit-learn orange */ + --sk-orange-tint-9: #fff5ec; + --sk-orange-tint-8: #ffead9; + --sk-orange-tint-7: #ffe0c5; + --sk-orange-tint-6: #ffd5b2; + --sk-orange-tint-5: #fecb9e; + --sk-orange-tint-4: #fdc08a; + --sk-orange-tint-3: #fcb575; + --sk-orange-tint-2: #fbaa5e; + --sk-orange-tint-1: #f99f44; + --sk-orange: #f7931e; + --sk-orange-shades-1: #d77f19; + --sk-orange-shades-2: #b76c13; + --sk-orange-shades-3: #99590e; + --sk-orange-shades-4: #7c4709; + --sk-orange-shades-5: #603605; + --sk-orange-shades-6: #452503; + --sk-orange-shades-7: #2c1601; + --sk-orange-shades-8: #150800; + --sk-orange-shades-9: #030100; +} diff --git a/doc/scss/custom.scss b/doc/scss/custom.scss new file mode 100644 index 0000000000000..cac81b03e7ce2 --- /dev/null +++ b/doc/scss/custom.scss @@ -0,0 +1,253 @@ +/** + * This is a general styling sheet. + * It should be used for customizations that affect multiple pages. + * + * This file is compiled into styles/custom.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +/* Global */ + +code.literal { + border: 0; +} + +/* Version switcher */ + +.version-switcher__menu.dropdown-menu { + // The version switcher is aligned right so we need to avoid the dropdown menu + // to be cut off by the right boundary + left: unset; + right: 0; + + a.list-group-item.sk-avail-docs-link { + display: flex; + align-items: center; + + &:after { + content: var(--pst-icon-external-link); + font: var(--fa-font-solid); + font-size: 0.75rem; + margin-left: 0.5rem; + } + } +} + +/* Primary sidebar */ + +.bd-sidebar-primary { + width: 22.5%; + min-width: 16rem; + + // The version switcher button in the sidebar is ill-styled + button.version-switcher__button { + margin-bottom: unset; + margin-left: 0.3rem; + font-size: 1rem; + } + + // The section navigation part is to close to the right boundary (originally an even + // larger negative right margin was used) + nav.bd-links { + margin-right: -0.5rem; + } +} + +/* Article content */ + +.bd-article { + h1 { + font-weight: 500; + margin-bottom: 2rem; + } + + h2 { + font-weight: 500; + margin-bottom: 1.5rem; + } + + // Avoid changing the aspect ratio of images; add some padding so that at least + // there is some space between image and background in dark mode + img { + height: unset !important; + padding: 1%; + } + + // Resize table of contents to make the top few levels of headings more visible + li.toctree-l1 { + padding-bottom: 0.5em; + + > a { + font-size: 150%; + font-weight: bold; + } + } + + li.toctree-l2, + li.toctree-l3, + li.toctree-l4 { + margin-left: 15px; + } +} + +/* Dropdowns (sphinx-design) */ + +details.sd-dropdown { + &:hover > summary.sd-summary-title { + > .sd-summary-text > a.headerlink { + visibility: visible; + } + + > .sk-toggle-all { + opacity: 1; + } + } + + > summary.sd-summary-title { + > .sd-summary-text > a.headerlink { + font-size: 1rem; + } + + // See `js/scripts/dropdown.js`: this is styling the "expand/collapse all" button + > .sk-toggle-all { + color: var(--pst-sd-dropdown-color); + margin-right: 0.5rem; + pointer-events: auto !important; + opacity: 0; + } + } +} + +/* Tabs (sphinx-design) */ + +.sd-tab-set { + --tab-caption-width: 0%; // No tab caption by default + margin-top: 1.5rem; + + &::before { + // Set `content` for tab caption + width: var(--tab-caption-width); + display: flex; + align-items: center; + font-weight: bold; + } + + .sd-tab-content { + padding: 0.5rem 0 0 0 !important; + background-color: transparent !important; + border: none !important; + + > p:first-child { + margin-top: 1rem !important; + } + } + + > label.sd-tab-label { + margin: 0 3px; + display: flex; + align-items: center; + justify-content: center; + border-radius: 5px !important; + + &.tab-6 { + width: calc((100% - var(--tab-caption-width)) / 2 - 6px) !important; + } + + &.tab-4 { + width: calc((100% - var(--tab-caption-width)) / 3 - 6px) !important; + } + } + + > input:checked + label.sd-tab-label { + transform: unset; + border: 2px solid var(--pst-color-primary); + } +} + +/* Download/launcher links and top hint (sphinx-gallery) */ + +// https://sphinx-gallery.github.io/stable/advanced.html#using-sphinx-gallery-sidebar-components +.sphx-glr-download-link-note, +.binder-badge, +.lite-badge, +.sphx-glr-download-jupyter, +.sphx-glr-download-python, +.sphx-glr-download-zip { + display: none; +} + +/* scikit-learn buttons */ + +a.btn { + &.sk-btn-orange { + background-color: var(--sk-orange-tint-1); + color: black !important; + + &:hover { + background-color: var(--sk-orange-tint-3); + } + } + + &.sk-btn-cyan { + background-color: var(--sk-cyan-shades-2); + color: white !important; + + &:hover { + background-color: var(--sk-cyan-shades-1); + } + } +} + +/* scikit-learn avatar grid, see build_tools/generate_authors_table.py */ + +div.sk-authors-container { + display: flex; + flex-wrap: wrap; + justify-content: center; + + > div { + width: 6rem; + margin: 0.5rem; + font-size: 0.9rem; + } +} + +/* scikit-learn text-image grid, used in testimonials and sponsors pages */ + +@mixin sk-text-image-grid($img-max-height) { + display: flex; + align-items: center; + flex-wrap: wrap; + + div.text-box, + div.image-box { + width: 50%; + + @media screen and (max-width: 500px) { + width: 100%; + } + } + + div.text-box .annotation { + font-size: 0.9rem; + font-style: italic; + color: var(--pst-color-text-muted); + } + + div.image-box { + text-align: center; + + img { + max-height: $img-max-height; + max-width: 50%; + } + } +} + +div.sk-text-image-grid-small { + @include sk-text-image-grid(60px); +} + +div.sk-text-image-grid-large { + @include sk-text-image-grid(100px); +} diff --git a/doc/scss/index.scss b/doc/scss/index.scss new file mode 100644 index 0000000000000..c3bb8e86b41c6 --- /dev/null +++ b/doc/scss/index.scss @@ -0,0 +1,176 @@ +/** + * Styling sheet for the scikit-learn landing page. This should be loaded only for the + * landing page. + * + * This file is compiled into styles/index.css by sphinxcontrib.sass, see: + * https://sass-lang.com/guide/ + */ + +/* Theme-aware colors for the landing page */ + +html { + &[data-theme="light"] { + --sk-landing-bg-1: var(--sk-cyan-shades-3); + --sk-landing-bg-2: var(--sk-cyan); + --sk-landing-bg-3: var(--sk-orange-tint-8); + --sk-landing-bg-4: var(--sk-orange-tint-3); + } + + &[data-theme="dark"] { + --sk-landing-bg-1: var(--sk-cyan-shades-5); + --sk-landing-bg-2: var(--sk-cyan-shades-2); + --sk-landing-bg-3: var(--sk-orange-tint-4); + --sk-landing-bg-4: var(--sk-orange-tint-1); + } +} + +/* General */ + +div.sk-landing-container { + max-width: 1400px; +} + +/* Top bar */ + +div.sk-landing-top-bar { + background-image: linear-gradient( + 160deg, + var(--sk-landing-bg-1) 0%, + var(--sk-landing-bg-2) 17%, + var(--sk-landing-bg-3) 59%, + var(--sk-landing-bg-4) 100% + ); + + .sk-landing-header, + .sk-landing-subheader { + color: white; + text-shadow: 0px 0px 8px var(--sk-landing-bg-1); + } + + .sk-landing-header { + font-size: 3.2rem; + margin-bottom: 0.5rem; + } + + .sk-landing-subheader { + letter-spacing: 0.17rem; + margin-top: 0; + font-weight: 500; + } + + a.sk-btn-orange { + font-size: 1.1rem; + font-weight: 500; + } + + ul.sk-landing-header-body { + margin-top: auto; + margin-bottom: auto; + font-size: 1.2rem; + font-weight: 500; + color: black; + } +} + +/* Body */ + +div.sk-landing-body { + div.card { + background-color: var(--pst-color-background); + border-color: var(--pst-color-border); + } + + .sk-px-xl-4 { + @media screen and (min-width: 1200px) { + padding-left: 1.3rem !important; + padding-right: 1.3rem !important; + } + } + + .card-body { + p { + margin-bottom: 0.8rem; + color: var(--pst-color-text-base); + } + + .sk-card-title { + font-weight: 700; + margin: 0 0 1rem 0; + } + } + + .sk-card-img-container { + display: flex; + justify-content: center; + align-items: end; + margin-bottom: 1rem; + + img { + max-width: unset; + height: 15rem; + } + } +} + +/* More info */ + +div.sk-landing-more-info { + font-size: 0.96rem; + background-color: var(--pst-color-surface); + + .sk-landing-call-header { + font-weight: 700; + margin-top: 0; + + html[data-theme="light"] & { + color: var(--sk-orange-shades-1); + } + + html[data-theme="dark"] & { + color: var(--sk-orange); + } + } + + ul.sk-landing-call-list > li { + margin-bottom: 0.25rem; + } + + .sk-who-uses-carousel { + min-height: 200px; + + .carousel-item img { + max-height: 100px; + max-width: 50%; + margin: 0.5rem; + } + } + + .sk-more-testimonials { + text-align: right !important; + } +} + +/* Footer */ + +div.sk-landing-footer { + a.sk-footer-funding-link { + text-decoration: none; + + p.sk-footer-funding-text { + color: var(--pst-color-link); + + &:hover { + color: var(--pst-color-secondary); + } + } + + div.sk-footer-funding-logos > img { + max-height: 40px; + max-width: 85px; + margin: 0 8px 8px 8px; + padding: 5px; + border-radius: 3px; + background-color: white; + } + } +} diff --git a/doc/sphinxext/add_toctree_functions.py b/doc/sphinxext/add_toctree_functions.py deleted file mode 100644 index 4459ab971f4c4..0000000000000 --- a/doc/sphinxext/add_toctree_functions.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Inspired by https://github.com/pandas-dev/pydata-sphinx-theme - -BSD 3-Clause License - -Copyright (c) 2018, pandas -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -import docutils - - -def add_toctree_functions(app, pagename, templatename, context, doctree): - """Add functions so Jinja templates can add toctree objects. - - This converts the docutils nodes into a nested dictionary that Jinja can - use in our templating. - """ - from sphinx.environment.adapters.toctree import TocTree - - def get_nav_object(maxdepth=None, collapse=True, numbered=False, **kwargs): - """Return a list of nav links that can be accessed from Jinja. - - Parameters - ---------- - maxdepth: int - How many layers of TocTree will be returned - collapse: bool - Whether to only include sub-pages of the currently-active page, - instead of sub-pages of all top-level pages of the site. - numbered: bool - Whether to add section number to title - kwargs: key/val pairs - Passed to the `TocTree.get_toctree_for` Sphinx method - """ - # The TocTree will contain the full site TocTree including sub-pages. - # "collapse=True" collapses sub-pages of non-active TOC pages. - # maxdepth controls how many TOC levels are returned - toctree = TocTree(app.env).get_toctree_for( - pagename, app.builder, collapse=collapse, maxdepth=maxdepth, **kwargs - ) - # If no toctree is defined (AKA a single-page site), skip this - if toctree is None: - return [] - - # toctree has this structure - # - # - # - # - # `list_item`s are the actual TOC links and are the only thing we want - toc_items = [ - item - for child in toctree.children - for item in child - if isinstance(item, docutils.nodes.list_item) - ] - - # Now convert our docutils nodes into dicts that Jinja can use - nav = [ - docutils_node_to_jinja(child, only_pages=True, numbered=numbered) - for child in toc_items - ] - - return nav - - context["get_nav_object"] = get_nav_object - - -def docutils_node_to_jinja(list_item, only_pages=False, numbered=False): - """Convert a docutils node to a structure that can be read by Jinja. - - Parameters - ---------- - list_item : docutils list_item node - A parent item, potentially with children, corresponding to the level - of a TocTree. - only_pages : bool - Only include items for full pages in the output dictionary. Exclude - anchor links (TOC items with a URL that starts with #) - numbered: bool - Whether to add section number to title - - Returns - ------- - nav : dict - The TocTree, converted into a dictionary with key/values that work - within Jinja. - """ - if not list_item.children: - return None - - # We assume this structure of a list item: - # - # - # <-- the thing we want - reference = list_item.children[0].children[0] - title = reference.astext() - url = reference.attributes["refuri"] - active = "current" in list_item.attributes["classes"] - - secnumber = reference.attributes.get("secnumber", None) - if numbered and secnumber is not None: - secnumber = ".".join(str(n) for n in secnumber) - title = f"{secnumber}. {title}" - - # If we've got an anchor link, skip it if we wish - if only_pages and "#" in url: - return None - - # Converting the docutils attributes into jinja-friendly objects - nav = {} - nav["title"] = title - nav["url"] = url - nav["active"] = active - - # Recursively convert children as well - # If there are sub-pages for this list_item, there should be two children: - # a paragraph, and a bullet_list. - nav["children"] = [] - if len(list_item.children) > 1: - # The `.children` of the bullet_list has the nodes of the sub-pages. - subpage_list = list_item.children[1].children - for sub_page in subpage_list: - child_nav = docutils_node_to_jinja( - sub_page, only_pages=only_pages, numbered=numbered - ) - if child_nav is not None: - nav["children"].append(child_nav) - return nav - - -def setup(app): - app.connect("html-page-context", add_toctree_functions) - - return {"parallel_read_safe": True, "parallel_write_safe": True} diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py old mode 100755 new mode 100644 index 89d7077bce2b5..3b85ce6c87508 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -4,8 +4,8 @@ from docutils.parsers.rst import Directive from sklearn.utils import all_estimators +from sklearn.utils._test_common.instance_generator import _construct_instances from sklearn.utils._testing import SkipTest -from sklearn.utils.estimator_checks import _construct_instance class AllowNanEstimators(Directive): @@ -19,20 +19,23 @@ def make_paragraph_for_estimator_type(estimator_type): lst = nodes.bullet_list() for name, est_class in all_estimators(type_filter=estimator_type): with suppress(SkipTest): - est = _construct_instance(est_class) - - if est._get_tags().get("allow_nan"): - module_name = ".".join(est_class.__module__.split(".")[:2]) - class_title = f"{est_class.__name__}" - class_url = f"./generated/{module_name}.{class_title}.html" - item = nodes.list_item() - para = nodes.paragraph() - para += nodes.reference( - class_title, text=class_title, internal=False, refuri=class_url - ) - exists = True - item += para - lst += item + # Here we generate the text only for one instance. This directive + # should not be used for meta-estimators where tags depend on the + # sub-estimator. + est = next(_construct_instances(est_class)) + + if est.__sklearn_tags__().input_tags.allow_nan: + module_name = ".".join(est_class.__module__.split(".")[:2]) + class_title = f"{est_class.__name__}" + class_url = f"./generated/{module_name}.{class_title}.html" + item = nodes.list_item() + para = nodes.paragraph() + para += nodes.reference( + class_title, text=class_title, internal=False, refuri=class_url + ) + exists = True + item += para + lst += item intro += lst return [intro] if exists else None diff --git a/doc/sphinxext/autoshortsummary.py b/doc/sphinxext/autoshortsummary.py new file mode 100644 index 0000000000000..8451f3133d05b --- /dev/null +++ b/doc/sphinxext/autoshortsummary.py @@ -0,0 +1,53 @@ +from sphinx.ext.autodoc import ModuleLevelDocumenter + + +class ShortSummaryDocumenter(ModuleLevelDocumenter): + """An autodocumenter that only renders the short summary of the object.""" + + # Defines the usage: .. autoshortsummary:: {{ object }} + objtype = "shortsummary" + + # Disable content indentation + content_indent = "" + + # Avoid being selected as the default documenter for some objects, because we are + # returning `can_document_member` as True for all objects + priority = -99 + + @classmethod + def can_document_member(cls, member, membername, isattr, parent): + """Allow documenting any object.""" + return True + + def get_object_members(self, want_all): + """Document no members.""" + return (False, []) + + def add_directive_header(self, sig): + """Override default behavior to add no directive header or options.""" + pass + + def add_content(self, more_content): + """Override default behavior to add only the first line of the docstring. + + Modified based on the part of processing docstrings in the original + implementation of this method. + + https://github.com/sphinx-doc/sphinx/blob/faa33a53a389f6f8bc1f6ae97d6015fa92393c4a/sphinx/ext/autodoc/__init__.py#L609-L622 + """ + sourcename = self.get_sourcename() + docstrings = self.get_doc() + + if docstrings is not None: + if not docstrings: + docstrings.append([]) + # Get the first non-empty line of the processed docstring; this could lead + # to unexpected results if the object does not have a short summary line. + short_summary = next( + (s for s in self.process_doc(docstrings) if s), "" + ) + self.add_line(short_summary, sourcename, 0) + + +def setup(app): + app.add_autodocumenter(ShortSummaryDocumenter) diff --git a/doc/sphinxext/dropdown_anchors.py b/doc/sphinxext/dropdown_anchors.py new file mode 100644 index 0000000000000..a001dfa11d403 --- /dev/null +++ b/doc/sphinxext/dropdown_anchors.py @@ -0,0 +1,58 @@ +import re + +from docutils import nodes +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx_design.dropdown import dropdown_main + + +class DropdownAnchorAdder(SphinxPostTransform): + """Insert anchor links to the sphinx-design dropdowns. + + Some of the dropdowns were originally headers that had automatic anchors, so we + need to make sure that the old anchors still work. See the original implementation + (in JS): https://github.com/scikit-learn/scikit-learn/pull/27409 + + The anchor links are inserted at the end of the node with class "sd-summary-text" + which includes only the title text part of the dropdown (no icon, markers, etc). + """ + + default_priority = 9999 # Apply later than everything else + formats = ["html"] + + def run(self): + """Run the post transformation.""" + # Counter to store the duplicated summary text to add it as a suffix in the + # anchor ID + anchor_id_counters = {} + + for sd_dropdown in self.document.findall(dropdown_main): + # Grab the summary text node + sd_summary_text = sd_dropdown.next_node( + lambda node: "sd-summary-text" in node.get("classes", []) + ) + + # Concatenate the text of relevant nodes as the title text + title_text = "".join(node.astext() for node in sd_summary_text.children) + + # The ID uses the first line, lowercased, with spaces replaced by dashes; + # suffix the anchor ID with a counter if it already exists + anchor_id = re.sub(r"\s+", "-", title_text.strip().split("\n")[0]).lower() + if anchor_id in anchor_id_counters: + anchor_id_counters[anchor_id] += 1 + anchor_id = f"{anchor_id}-{anchor_id_counters[anchor_id]}" + else: + anchor_id_counters[anchor_id] = 1 + sd_dropdown["ids"].append(anchor_id) + + # Create the anchor element and insert after the title text; we do this + # directly with raw HTML + anchor_html = ( + f'#' + ) + anchor_node = nodes.raw("", anchor_html, format="html") + sd_summary_text.append(anchor_node) + + +def setup(app): + app.add_post_transform(DropdownAnchorAdder) diff --git a/doc/sphinxext/override_pst_pagetoc.py b/doc/sphinxext/override_pst_pagetoc.py new file mode 100644 index 0000000000000..f5697de8ef155 --- /dev/null +++ b/doc/sphinxext/override_pst_pagetoc.py @@ -0,0 +1,84 @@ +from functools import cache + +from sphinx.util.logging import getLogger + +logger = getLogger(__name__) + + +def override_pst_pagetoc(app, pagename, templatename, context, doctree): + """Overrides the `generate_toc_html` function of pydata-sphinx-theme for API.""" + + @cache + def generate_api_toc_html(kind="html"): + """Generate the in-page toc for an API page. + + This relies on the `generate_toc_html` function added by pydata-sphinx-theme + into the context. We save the original function into `pst_generate_toc_html` + and override `generate_toc_html` with this function for generated API pages. + + The pagetoc of an API page would look like the following: + +
    <-- Unwrap +
  • <-- Unwrap + {{obj}} <-- Decompose + +
      +
    • + ...object +
        <-- Set visible if exists +
      • ...method 1
      • <-- Shorten +
      • ...method 2
      • <-- Shorten + ...more methods <-- Shorten +
      +
    • +
    • ...gallery examples
    • +
    + +
  • <-- Unwrapped +
<-- Unwrapped + """ + soup = context["pst_generate_toc_html"](kind="soup") + + try: + # Unwrap the outermost level + soup.ul.unwrap() + soup.li.unwrap() + soup.a.decompose() + + # Get all toc-h2 level entries, where the first one should be the function + # or class, and the second one, if exists, should be the examples; there + # should be no more than two entries at this level for generated API pages + lis = soup.ul.select("li.toc-h2") + main_li = lis[0] + meth_list = main_li.ul + + if meth_list is not None: + # This is a class API page, we remove the class name from the method + # names to make them better fit into the secondary sidebar; also we + # make the toc-h3 level entries always visible to more easily navigate + # through the methods + meth_list["class"].append("visible") + for meth in meth_list.find_all("li", {"class": "toc-h3"}): + target = meth.a.code.span + target.string = target.string.split(".", 1)[1] + + # This corresponds to the behavior of `generate_toc_html` + return str(soup) if kind == "html" else soup + + except Exception as e: + # Upon any failure we return the original pagetoc + logger.warning( + f"Failed to generate API pagetoc for {pagename}: {e}; falling back" + ) + return context["pst_generate_toc_html"](kind=kind) + + # Override the pydata-sphinx-theme implementation for generate API pages + if pagename.startswith("modules/generated/"): + context["pst_generate_toc_html"] = context["generate_toc_html"] + context["generate_toc_html"] = generate_api_toc_html + + +def setup(app): + # Need to be triggered after `pydata_sphinx_theme.toctree.add_toctree_functions`, + # and since default priority is 500 we set 900 for safety + app.connect("html-page-context", override_pst_pagetoc, priority=900) diff --git a/doc/supervised_learning.rst b/doc/supervised_learning.rst index 71fb3007c2e3c..ba24e8ee23c6f 100644 --- a/doc/supervised_learning.rst +++ b/doc/supervised_learning.rst @@ -1,9 +1,3 @@ -.. Places parent toc into the sidebar - -:parenttoc: True - -.. include:: includes/big_toc_css.rst - .. _supervised-learning: Supervised learning diff --git a/doc/support.rst b/doc/support.rst index be9b32b60a9c8..eb90ff6dd3d94 100644 --- a/doc/support.rst +++ b/doc/support.rst @@ -12,12 +12,12 @@ There are several channels to connect with scikit-learn developers for assistanc Mailing Lists ============= -- **Main Mailing List**: Join the primary discussion - platform for scikit-learn at `scikit-learn Mailing List +- **Main Mailing List**: Join the primary discussion + platform for scikit-learn at `scikit-learn Mailing List `_. -- **Commit Updates**: Stay informed about repository - updates and test failures on the `scikit-learn-commits list +- **Commit Updates**: Stay informed about repository + updates and test failures on the `scikit-learn-commits list `_. .. _user_questions: @@ -27,28 +27,28 @@ User Questions If you have questions, this is our general workflow. -- **Stack Overflow**: Some scikit-learn developers support users using the - `[scikit-learn] `_ +- **Stack Overflow**: Some scikit-learn developers support users using the + `[scikit-learn] `_ tag. -- **General Machine Learning Queries**: For broader machine learning +- **General Machine Learning Queries**: For broader machine learning discussions, visit `Stack Exchange `_. When posting questions: -- Please use a descriptive question in the title field (e.g. no "Please - help with scikit-learn!" as this is not a question) +- Please use a descriptive question in the title field (e.g. no "Please + help with scikit-learn!" as this is not a question) - Provide detailed context, expected results, and actual observations. -- Include code and data snippets (preferably minimalistic scripts, +- Include code and data snippets (preferably minimalistic scripts, up to ~20 lines). -- Describe your data and preprocessing steps, including sample size, - feature types (categorical or numerical), and the target for supervised +- Describe your data and preprocessing steps, including sample size, + feature types (categorical or numerical), and the target for supervised learning tasks (classification type or regression). -**Note**: Avoid asking user questions on the bug tracker to keep +**Note**: Avoid asking user questions on the bug tracker to keep the focus on development. - `GitHub Discussions `_ @@ -61,7 +61,7 @@ the focus on development. Bug reports - Please do not ask usage questions on the issue tracker. - `Discord Server `_ - Current pull requests - Post any specific PR-related questions on your PR, + Current pull requests - Post any specific PR-related questions on your PR, and you can share a link to your PR on this server. .. _bug_tracker: @@ -83,11 +83,21 @@ Include in your report: - The ideal bug report contains a :ref:`short reproducible code snippet `, this way anyone can try to reproduce the bug easily. -- If your snippet is longer than around 50 lines, please link to a +- If your snippet is longer than around 50 lines, please link to a `gist `_ or a github repo. **Tip**: Gists are Git repositories; you can push data files to them using Git. +Paid support +============ + +The following companies (listed in alphabetical order) offer support services +related to scikit-learn and have a proven track record of employing long-term +maintainers of scikit-learn and related open source projects: + +- `:probabl. `__ +- `Quansight `__ + .. _social_media: Social Media @@ -102,8 +112,8 @@ questions. Gitter ====== -**Note**: The scikit-learn Gitter room is no longer an active community. -For live discussions and support, please refer to the other channels +**Note**: The scikit-learn Gitter room is no longer an active community. +For live discussions and support, please refer to the other channels mentioned in this document. .. _documentation_resources: @@ -111,11 +121,12 @@ mentioned in this document. Documentation Resources ======================= -This documentation is for |release|. Find documentation for other versions -`here `__. +This documentation is for |release|. Documentation for other versions can be found `here +`__, including zip archives which can be +downloaded for offline access. -Older versions' printable PDF documentation is available `here -`_. -Building the PDF documentation is no longer supported in the website, -but you can still generate it locally by following the -:ref:`building documentation instructions `. +We no longer provide a PDF version of the documentation, but you can still generate it +locally by following the :ref:`building documentation instructions `. +The most recent version with a PDF documentation is quite old, 0.23.2 (released +in August 2020), but the PDF is available `here +`__. diff --git a/doc/templates/base.rst b/doc/templates/base.rst new file mode 100644 index 0000000000000..ee86bd8a18dbe --- /dev/null +++ b/doc/templates/base.rst @@ -0,0 +1,36 @@ +{{ objname | escape | underline(line="=") }} + +{% if objtype == "module" -%} + +.. automodule:: {{ fullname }} + +{%- elif objtype == "function" -%} + +.. currentmodule:: {{ module }} + +.. autofunction:: {{ objname }} + +.. minigallery:: {{ module }}.{{ objname }} + :add-heading: Gallery examples + :heading-level: - + +{%- elif objtype == "class" -%} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :inherited-members: + :special-members: __call__ + +.. minigallery:: {{ module }}.{{ objname }} {% for meth in methods %}{{ module }}.{{ objname }}.{{ meth }} {% endfor %} + :add-heading: Gallery examples + :heading-level: - + +{%- else -%} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} + +{%- endif -%} diff --git a/doc/templates/class.rst b/doc/templates/class.rst deleted file mode 100644 index 1e98be4099b73..0000000000000 --- a/doc/templates/class.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/class_with_call.rst b/doc/templates/class_with_call.rst deleted file mode 100644 index bc1567709c9d3..0000000000000 --- a/doc/templates/class_with_call.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}=============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __call__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class.rst b/doc/templates/deprecated_class.rst deleted file mode 100644 index 5c31936f6fc36..0000000000000 --- a/doc/templates/deprecated_class.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __init__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class_with_call.rst b/doc/templates/deprecated_class_with_call.rst deleted file mode 100644 index 072a31112be50..0000000000000 --- a/doc/templates/deprecated_class_with_call.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}=============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __init__ - .. automethod:: __call__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_class_without_init.rst b/doc/templates/deprecated_class_without_init.rst deleted file mode 100644 index a26afbead5451..0000000000000 --- a/doc/templates/deprecated_class_without_init.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/deprecated_function.rst b/doc/templates/deprecated_function.rst deleted file mode 100644 index ead5abec27076..0000000000000 --- a/doc/templates/deprecated_function.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}==================== - -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** - - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/display_all_class_methods.rst b/doc/templates/display_all_class_methods.rst deleted file mode 100644 index b179473cf841e..0000000000000 --- a/doc/templates/display_all_class_methods.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples -.. include:: {{module}}.{{objname}}.from_estimator.examples -.. include:: {{module}}.{{objname}}.from_predictions.examples - -.. raw:: html - -
diff --git a/doc/templates/display_only_from_estimator.rst b/doc/templates/display_only_from_estimator.rst deleted file mode 100644 index 9981910dc8be7..0000000000000 --- a/doc/templates/display_only_from_estimator.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples -.. include:: {{module}}.{{objname}}.from_estimator.examples - -.. raw:: html - -
diff --git a/doc/templates/function.rst b/doc/templates/function.rst deleted file mode 100644 index 93d368ecfe6d5..0000000000000 --- a/doc/templates/function.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. - The empty line below should not be removed. It is added such that the `rst_prolog` - is added before the :mod: directive. Otherwise, the rendering will show as a - paragraph instead of a header. - -:mod:`{{module}}`.{{objname}} -{{ underline }}==================== - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/templates/generate_deprecated.sh b/doc/templates/generate_deprecated.sh deleted file mode 100755 index a7301fb5dc419..0000000000000 --- a/doc/templates/generate_deprecated.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -for f in [^d]*; do (head -n2 < $f; echo ' -.. meta:: - :robots: noindex - -.. warning:: - **DEPRECATED** -'; tail -n+3 $f) > deprecated_$f; done diff --git a/doc/templates/index.html b/doc/templates/index.html index 5b3a61a5b98bb..0f0cecf7fed96 100644 --- a/doc/templates/index.html +++ b/doc/templates/index.html @@ -1,25 +1,27 @@ {% extends "layout.html" %} {% set title = 'scikit-learn: machine learning in Python' %} -{% if theme_link_to_live_contributing_page|tobool %} +{% if is_devrelease|tobool %} + {%- set contributing_link = pathto("developers/contributing") %} + {%- set contributing_attrs = "" %} +{%- else %} {%- set contributing_link = "https://scikit-learn.org/dev/developers/contributing.html" %} {%- set contributing_attrs = 'target="_blank" rel="noopener noreferrer"' %} -{%- else %} - {%- set contributing_link = pathto('developers/contributing') %} - {%- set contributing_attrs = '' %} {%- endif %} +{%- import "static/webpack-macros.html" as _webpack with context %} -{% block content %} -
+{% block docs_navbar %} +{{ super() }} + +
-

scikit-learn

-

Machine Learning in Python

- Getting Started - Release Highlights for {{ release_highlights_version }} - GitHub +

scikit-learn

+

Machine Learning in Python

+ Getting Started + Release Highlights for {{ release_highlights_version }}
    @@ -33,240 +35,281 @@

    Machine Learning in

-
+{% endblock docs_navbar %} + +{% block docs_main %} + +
+
-
+
-

Classification

-

Identifying which category an object belongs to.

-

Applications: Spam detection, image recognition.
- Algorithms: - Gradient boosting, - nearest neighbors, - random forest, - logistic regression, - and more...

+

+ Classification +

+

Identifying which category an object belongs to.

+

+ Applications: Spam detection, image recognition.
+ Algorithms: + Gradient boosting, + nearest neighbors, + random forest, + logistic regression, + and more... +

-
+
-
+
-

Regression

-

Predicting a continuous-valued attribute associated with an object.

-

Applications: Drug response, Stock prices.
- Algorithms: - Gradient boosting, - nearest neighbors, - random forest, - ridge, - and more...

+

+ Regression +

+

Predicting a continuous-valued attribute associated with an object.

+

+ Applications: Drug response, stock prices.
+ Algorithms: + Gradient boosting, + nearest neighbors, + random forest, + ridge, + and more... +

-
+
-
+
-

Clustering

-

Automatic grouping of similar objects into sets.

-

Applications: Customer segmentation, Grouping experiment outcomes
- Algorithms: - k-Means, - HDBSCAN, - hierarchical - clustering, - and more...

+

+ Clustering +

+

Automatic grouping of similar objects into sets.

+

+ Applications: Customer segmentation, grouping experiment outcomes.
+ Algorithms: + k-Means, + HDBSCAN, + hierarchical clustering, + and more... +

-
+
-
+
-

Dimensionality reduction

-

Reducing the number of random variables to consider.

-

Applications: Visualization, Increased efficiency
- Algorithms: - PCA, - feature selection, - non-negative matrix factorization, - and more...

+

+ Dimensionality reduction +

+

Reducing the number of random variables to consider.

+

+ Applications: Visualization, increased efficiency.
+ Algorithms: + PCA, + feature selection, + non-negative matrix factorization, + and more... +

-
+
-
+
-

Model selection

-

Comparing, validating and choosing parameters and models.

-

Applications: Improved accuracy via parameter tuning
- Algorithms: - grid search, - cross validation, - metrics, - and more...

+

+ Model selection +

+

Comparing, validating and choosing parameters and models.

+

+ Applications: Improved accuracy via parameter tuning.
+ Algorithms: + Grid search, + cross validation, + metrics, + and more... +

-
+
-
+
-

Preprocessing

-

Feature extraction and normalization.

-

Applications: Transforming input data such as text for use with machine learning algorithms.
- Algorithms: - preprocessing, - feature extraction, - and more...

+

+ Preprocessing +

+

Feature extraction and normalization.

+

+ Applications: Transforming input data such as text for use with machine learning algorithms.
+ Algorithms: + Preprocessing, + feature extraction, + and more... +

-
-
-
+{% endblock docs_main %} + +{% block footer %} + +
+
+

News

    -
  • On-going development: - scikit-learn 1.5 (Changelog) -
  • -
  • April 2024. scikit-learn 1.4.2 is available for download (Changelog). -
  • -
  • February 2024. scikit-learn 1.4.1.post1 is available for download (Changelog). -
  • -
  • January 2024. scikit-learn 1.4.0 is available for download (Changelog). -
  • -
  • October 2023. scikit-learn 1.3.2 is available for download (Changelog). -
  • -
  • September 2023. scikit-learn 1.3.1 is available for download (Changelog). -
  • -
  • June 2023. scikit-learn 1.3.0 is available for download (Changelog). -
  • -
  • All releases: - What's new (Changelog) -
  • +
  • On-going development: scikit-learn 1.7 (Changelog).
  • +
  • January 2025. scikit-learn 1.6.1 is available for download (Changelog).
  • +
  • December 2024. scikit-learn 1.6.0 is available for download (Changelog).
  • +
  • September 2024. scikit-learn 1.5.2 is available for download (Changelog).
  • +
  • July 2024. scikit-learn 1.5.1 is available for download (Changelog).
  • +
  • May 2024. scikit-learn 1.5.0 is available for download (Changelog).
  • +
  • April 2024. scikit-learn 1.4.2 is available for download (Changelog).
  • +
  • February 2024. scikit-learn 1.4.1.post1 is available for download (Changelog).
  • +
  • January 2024. scikit-learn 1.4.0 is available for download (Changelog).
  • +
  • All releases: What's new (Changelog).
+

Community

- - Help us, donate! - Cite us! +

+ Help us, donate! + Cite us! +

+

Who uses scikit-learn?

-
-
+ +