diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 266986a2bf..0000000000 --- a/.coveragerc +++ /dev/null @@ -1,3 +0,0 @@ -[run] -include = seaborn/* -omit = *external* diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000000..9930b6187c --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,29 @@ +Contributing to seaborn +======================= + +General support +--------------- + +General support questions ("how do I do X?") are most at home on [StackOverflow](https://stackoverflow.com/), which has a larger audience of people who will see your post and may be able to offer assistance. Your chance of getting a quick answer will be higher if you include runnable code, a precise statement of what you are hoping to achieve, and a clear explanation of the problems that you have encountered. + +Reporting bugs +-------------- + +If you think you've encountered a bug in seaborn, please report it on the [Github issue tracker](https://github.com/mwaskom/seaborn/issues/new). To be useful, bug reports *must* include the following information: + +- A reproducible code example that demonstrates the problem +- The output that you are seeing (an image of a plot, or the error message) +- A clear explanation of why you think something is wrong +- The specific versions of seaborn and matplotlib that you are working with + +Bug reports are easiest to address if they can be demonstrated using one of the example datasets from the seaborn docs (i.e. with `seaborn.load_dataset`). Otherwise, it is preferable that your example generate synthetic data to reproduce the problem. If you can only demonstrate the issue with your actual dataset, you will need to share it, ideally as a csv (do not share data as a pickle file). + +If you've encountered an error, searching the specific text of the message before opening a new issue can often help you solve the problem quickly and avoid making a duplicate report. + +Because matplotlib handles the actual rendering, errors or incorrect outputs may be due to a problem in matplotlib rather than one in seaborn. It can save time if you try to reproduce the issue in an example that uses only matplotlib, so that you can report it in the right place. But it is alright to skip this step if it's not obvious how to do it. + + +New features +------------ + +If you think there is a new feature that should be added to seaborn, you can open an issue to discuss it. But please be aware that current development efforts are mostly focused on standardizing the API and internals, and there may be relatively low enthusiasm for novel features that do not fit well into short- and medium-term development plans. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..ac27a84869 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000000..6260ab7699 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,115 @@ +name: CI + +on: + push: + branches: [master, v0.*] + pull_request: + branches: master + schedule: + - cron: '0 6 * * 1,4' # Each Monday and Thursday at 06:00 UTC + workflow_dispatch: + +permissions: + contents: read + +env: + NB_KERNEL: python + MPLBACKEND: Agg + SEABORN_DATA: ${{ github.workspace }}/seaborn-data + PYDEVD_DISABLE_FILE_VALIDATION: 1 + +jobs: + build-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Setup Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: "3.11" + + - name: Install seaborn + run: | + pip install --upgrade pip + pip install .[stats,docs] + + - name: Install pandoc + run: | + wget https://github.com/jgm/pandoc/releases/download/3.1.11/pandoc-3.1.11-1-amd64.deb + sudo dpkg -i pandoc-3.1.11-1-amd64.deb + + - name: Cache datasets + run: | + git clone https://github.com/mwaskom/seaborn-data.git + ls $SEABORN_DATA + + - name: Build docs + env: + SPHINXOPTS: -j `nproc` + run: | + cd doc + make -j `nproc` notebooks + make html + + + run-tests: + runs-on: ubuntu-latest + + strategy: + matrix: + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + install: [full] + deps: [latest] + + include: + - python: "3.8" + install: full + deps: pinned + - python: "3.11" + install: light + deps: latest + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: ${{ matrix.python }} + allow-prereleases: true + + - name: Install seaborn + run: | + pip install --upgrade pip wheel + if [[ ${{matrix.install}} == 'full' ]]; then EXTRAS=',stats'; fi + if [[ ${{matrix.deps }} == 'pinned' ]]; then DEPS='-r ci/deps_pinned.txt'; fi + pip install .[dev$EXTRAS] $DEPS + + - name: Run tests + run: make test + + - name: Upload coverage + uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d # v3.1.4 + if: ${{ success() }} + + lint: + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: Setup Python + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + + - name: Install tools + run: pip install mypy~=1.10.0 flake8 + + - name: Flake8 + run: make lint + + - name: Type checking + run: make typecheck diff --git a/.gitignore b/.gitignore index 6466f7a6d7..c9e7058fe9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,13 @@ build/ .ipynb_checkpoints/ dist/ seaborn.egg-info/ +.cache/ .coverage cover/ -.idea +htmlcov/ +.idea/ +.vscode/ +.pytest_cache/ +.DS_Store +notes/ +notebooks/ diff --git a/.mailmap b/.mailmap deleted file mode 100644 index 236389d106..0000000000 --- a/.mailmap +++ /dev/null @@ -1,3 +0,0 @@ -Michael Waskom mwaskom -Tal Yarkoni -Daniel B. Allan diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..c2d04a52a4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.svg$ +- repo: https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + exclude: seaborn/(cm\.py|external/) + types: [file, python] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.971 + hooks: + - id: mypy + args: [--follow-imports=skip] + files: seaborn/_(core|marks|stats)/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index cabdeb5c87..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,54 +0,0 @@ -language: python - -env: - - PYTHON=2.7 DEPS=modern - - PYTHON=2.7 DEPS=minimal - - PYTHON=3.3 DEPS=modern - - PYTHON=3.4 DEPS=modern - -install: - - conda update conda --yes - - conda create -n testenv --yes pip python=$PYTHON - - conda update conda --yes - - source activate testenv - - if [ ${PYTHON:0:1} == "2" ]; - then conda install --yes imaging; else - pip install pillow; - fi - - conda install --yes --file testing/deps_${DEPS}_${PYTHON}.txt - - conda install ipython-notebook=2 --yes - #- pip install sphinx numpydoc sphinx_bootstrap_theme runipy - #- sudo apt-get install pandoc - - pip install pep8==1.5 # Later versions get stricter... - - pip install https://github.com/dcramer/pyflakes/tarball/master - - pip install . - - cp testing/matplotlibrc . - -before_install: - - sudo apt-get update -yq - - sudo sh -c "echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections" - - sudo apt-get install msttcorefonts -qq - - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - - chmod +x miniconda.sh - - ./miniconda.sh -b - - export PATH=/home/travis/miniconda/bin:$PATH - -before_script: - - if [ ${PYTHON:0:1} == "2" ]; then - make lint; - fi - #- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then - # cd doc; - # make notebooks html; - # cd ..; - # fi - -script: - - nosetests --with-doctest - # Notebook tests are failing on old matplotlib - # because the figures have every so slightly a - # different size. Skip until I understand why. - - if [ ${DEPS} == modern ] && [ ${PYTHON} != 3.4 ]; then - python ipnbdoctest.py examples/*.ipynb; - fi - diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..ee4d598e11 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,16 @@ +cff-version: 1.2.0 +message: "If seaborn is integral to a scientific publication, please cite the following paper:" +preferred-citation: + type: article + authors: + - family-names: "Waskom" + given-names: "Michael Lawrence" + orcid: "https://orcid.org/0000-0002-9817-6869" + doi: "10.21105/joss.03021" + journal: "Journal of Open Source Software" + month: April + title: "seaborn: statistical data visualization" + issue: 6 + volume: 60 + year: 2021 + url: "https://joss.theoj.org/papers/10.21105/joss.03021" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 6bf695a7c4..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,56 +0,0 @@ -Contributing code -================= - -To contribute code to seaborn, it's best to follow the usual github workflow: - -- Fork the [main seaborn repository](https://github.com/mwaskom/seaborn) -- Create a feature branch with `git checkout -b ` -- Add some new code -- Push to your fork with `git push origin ` -- Open a pull-request on the main repository - -Here are some further notes on specific aspects of seaborn development that are good to know about. - -#### Getting in touch - -In general, it can't hurt to get in touch by opening an issue before you start your work. Because seaborn is relatively young, there are a lot of things that I have partially-formed thoughts on, but haven't gotten a chance to fully implement yet. I very much appreciate help, but I'll be more likely to merge in changes that fit into my plans for the package (which might only exist inside my head). So, giving me a heads up about what you have in mind will save time for everyone. - -#### Where to branch - -For any new features, or enhancements to existing features, you should branch off `master`. The main repo also has branches corresponding to each point release (e.g. `v0.2`). If you are fixing a bug, it might be better to branch from there so the fix can be included in an incremental release. This will probably get sorted out in the issue reporting the bug. - -#### Working on a Pull Request - -Since seaborn is a plotting package, it's most useful to be able to see the new feature or the consequences of changes your contribution will make. When you open the pull request, including a link to an example notebook (through [nbviewer](http://nbviewer.ipython.org/)) or at least a static screenshot is very helpful. - -#### Testing and documentation - -Currently, seaborn uses the notebooks in `examples/` for both documentation and testing. This is proving to be a somewhat problematic solution, and I am worried about many incremental changes to these notebooks producing a large and unwieldy repository. Please try to hold off committing changes to the notebooks until the feature is ready to go. In the meantime, it might be useful to discuss changes in the context of the example notebooks, but please edit them without committing and share via nbviewer from a gist/dropbox link/etc. - -The formal unit-test coverage of the package is quite poor, as the focus has been on using the example notebooks for testing. Going forward, this should change. Please include unit-tests that at least touch the various branches through the functions to ward off errors; in cases where it's possible to programmatically check the outputs of the functions, please do so. - -Once you're ready to update the docs, it's good to add a little narrative information about what a feature does and what kind of visualization problems it can be useful for. Then, provide an example or two showing the function in action. The existing docs should be a good guide here. - -If you're unsure where in the documentation your feature should be discussed, please feel free to ask. - -After adding your changes but before committing, please perform the following to steps: - -- Restart the notebook kernel and "run all" cells so you can be certain the notebook executes and the cell numbers are in the right order - -- Run `make hexstrip` to remove the random hex memory identifiers that are stored in the notebook, for a cleaner commit - -- Use `git diff` to make sure your changes didn't result in a cascading change to lots of figures - -Useful commands to know about for testing: - -- `make test` runs the full test suite (unit-tests and notebooks) - -- `nosetests` runs the unit-test suite in isolation - -- `python examples/ipnbdoctest.py examples/.ipynb` can be used to test a specific notebook - -- `make coverage` will run the unit-test suite and produce a coverage report - -- `make lint` will run `pep8` and `pyflakes` over the codebase. Doing so requires [this](https://github.com/dcramer/pyflakes) fork of pyflakes, which can be installed with `pip install https://github.com/dcramer/pyflakes/tarball/master` - -Functions should be documented with the [numpy](https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt) standard. Current functions usually don't have examples, but it would be more useful if they did. diff --git a/LICENSE b/LICENSE.md similarity index 92% rename from LICENSE rename to LICENSE.md index c6b4209658..86f5ad0986 100644 --- a/LICENSE +++ b/LICENSE.md @@ -1,4 +1,4 @@ -Copyright (c) 2012-2013, Michael L. Waskom +Copyright (c) 2012-2023, Michael L. Waskom All rights reserved. Redistribution and use in source and binary forms, with or without @@ -11,7 +11,7 @@ modification, are permitted provided that the following conditions are met: this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -* Neither the name of the {organization} nor the names of its +* Neither the name of the project nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index c203dd10b3..0000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -include README.md -include CONTRIBUTING.md -include LICENSE -recursive-include licences * diff --git a/Makefile b/Makefile index 02282d12bf..c94c16fe25 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,10 @@ export SHELL := /bin/bash test: - - cp testing/matplotlibrc . - nosetests --with-doctest - python ipnbdoctest.py examples/*.ipynb - rm matplotlibrc - - -coverage: - - cp testing/matplotlibrc . - nosetests --cover-erase --with-coverage --cover-html --cover-package seaborn - rm matplotlibrc + pytest -n auto --cov=seaborn --cov=tests --cov-config=setup.cfg tests lint: + flake8 seaborn/ tests/ - pyflakes -x W seaborn - pep8 --exclude external seaborn - -hexstrip: - - make -C examples hexstrip +typecheck: + mypy --follow-imports=skip seaborn/_core seaborn/_marks seaborn/_stats diff --git a/README.md b/README.md index fa6e020840..97603ede54 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,15 @@ -Seaborn: statistical data visualization -======================================= - -
- - - - - - - - - - - +
- - - +-------------------------------------- - - - +seaborn: statistical data visualization +======================================= -
+[![PyPI Version](https://img.shields.io/pypi/v/seaborn.svg)](https://pypi.org/project/seaborn/) +[![License](https://img.shields.io/pypi/l/seaborn.svg)](https://github.com/mwaskom/seaborn/blob/master/LICENSE.md) +[![DOI](https://joss.theoj.org/papers/10.21105/joss.03021/status.svg)](https://doi.org/10.21105/joss.03021) +[![Tests](https://github.com/mwaskom/seaborn/workflows/CI/badge.svg)](https://github.com/mwaskom/seaborn/actions) +[![Code Coverage](https://codecov.io/gh/mwaskom/seaborn/branch/master/graph/badge.svg)](https://codecov.io/gh/mwaskom/seaborn) Seaborn is a Python visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. @@ -30,87 +17,54 @@ Seaborn is a Python visualization library based on matplotlib. It provides a hig Documentation ------------- -Online documentation is available [here](http://stanford.edu/~mwaskom/software/seaborn/). - -There are docs for the development version [here](http://stanford.edu/~mwaskom/software/seaborn-dev/). These should more or less correspond with the github master branch, but they're not currently built automatically and thus may fall out of sync at times. Also, note that the API docs should always stay up to date, but the tutorials may lag behind. - -Examples --------- - -The documentation has an [example gallery](http://stanford.edu/~mwaskom/software/seaborn/examples/index.html) with short scripts showing how to use different parts of the package. You can also check out the example notebooks: +Online documentation is available at [seaborn.pydata.org](https://seaborn.pydata.org). -- [Controlling figure aesthetics in seaborn](http://nbviewer.ipython.org/github/mwaskom/seaborn/blob/master/examples/aesthetics.ipynb) - -- [Graphical representations of linear models](http://nbviewer.ipython.org/github/mwaskom/seaborn/blob/master/examples/linear_models.ipynb) - -- [Visualizing distributions of data](http://nbviewer.ipython.org/github/mwaskom/seaborn/blob/master/examples/plotting_distributions.ipynb) - -- [Plotting statistical timeseries data](http://nbviewer.ipython.org/github/mwaskom/seaborn/blob/master/examples/timeseries_plots.ipynb) - -Citing ------- - -Seaborn can be cited using a DOI provided through Zenodo: [![DOI](https://zenodo.org/badge/doi/10.5072/zenodo.12710.png)](http://dx.doi.org/10.5072/zenodo.12710) +The docs include a [tutorial](https://seaborn.pydata.org/tutorial.html), [example gallery](https://seaborn.pydata.org/examples/index.html), [API reference](https://seaborn.pydata.org/api.html), [FAQ](https://seaborn.pydata.org/faq), and other useful information. +To build the documentation locally, please refer to [`doc/README.md`](doc/README.md). Dependencies ------------ -- Python 2.7 or 3.3+ - -### Mandatory +Seaborn supports Python 3.8+. -- [numpy](http://www.numpy.org/) +Installation requires [numpy](https://numpy.org/), [pandas](https://pandas.pydata.org/), and [matplotlib](https://matplotlib.org/). Some advanced statistical functionality requires [scipy](https://www.scipy.org/) and/or [statsmodels](https://www.statsmodels.org/). -- [scipy](http://www.scipy.org/) -- [matplotlib](http://matplotlib.sourceforge.net) - -- [pandas](http://pandas.pydata.org/) - -### Recommended +Installation +------------ -- [statsmodels](http://statsmodels.sourceforge.net/) +The latest stable release (and required dependencies) can be installed from PyPI: -- [patsy](http://patsy.readthedocs.org/en/latest/) + pip install seaborn +It is also possible to include optional statistical dependencies: -Installation ------------- + pip install seaborn[stats] -To install the released version, just do +Seaborn can also be installed with conda: - pip install seaborn + conda install seaborn -You may instead want to use the development version from Github, by running +Note that the main anaconda repository lags PyPI in adding new releases, but conda-forge (`-c conda-forge`) typically updates quickly. - pip install git+git://github.com/mwaskom/seaborn.git#egg=seaborn +Citing +------ +A paper describing seaborn has been published in the [Journal of Open Source Software](https://joss.theoj.org/papers/10.21105/joss.03021). The paper provides an introduction to the key features of the library, and it can be used as a citation if seaborn proves integral to a scientific publication. Testing ------- -[![Build Status](https://travis-ci.org/mwaskom/seaborn.png?branch=master)](https://travis-ci.org/mwaskom/seaborn) +Testing seaborn requires installing additional dependencies; they can be installed with the `dev` extra (e.g., `pip install .[dev]`). -To test seaborn, run `make test` in the source directory. This will run the -unit-test suite (using `nose`). It will also execute the example notebooks and -compare the outputs of each cell to the data in the stored versions. +To test the code, run `make test` in the source directory. This will exercise the unit tests (using [pytest](https://docs.pytest.org/)) and generate a coverage report. +Code style is enforced with `flake8` using the settings in the [`setup.cfg`](./setup.cfg) file. Run `make lint` to check. Alternately, you can use `pre-commit` to automatically run lint checks on any files you are committing: just run `pre-commit install` to set it up, and then commit as usual going forward. Development ----------- -https://github.com/mwaskom/seaborn - -Please [submit](https://github.com/mwaskom/seaborn/issues/new) any bugs you encounter to the Github issue tracker. - -License -------- - -Released under a BSD (3-clause) license - - -Celebrity Endorsements ----------------------- +Seaborn development takes place on Github: https://github.com/mwaskom/seaborn -"Those are nice plots" -Hadley Wickham +Please submit bugs that you encounter to the [issue tracker](https://github.com/mwaskom/seaborn/issues) with a reproducible example demonstrating the problem. Questions about usage are more at home on StackOverflow, where there is a [seaborn tag](https://stackoverflow.com/questions/tagged/seaborn). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..e2842a8d89 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +If you have discovered a security vulnerability in this project, please report it +privately. **Do not disclose it as a public issue.** This gives me time to work with you +to fix the issue before public exposure, reducing the chance that the exploit will be +used before a patch is released. + +You may submit the report by filling out +[this form](https://github.com/mwaskom/seaborn/security/advisories/new). + +Please provide the following information in your report: + +- A description of the vulnerability and its impact +- How to reproduce the issue + +This project is maintained by a single maintainer on a reasonable-effort basis. As such, +I ask that you give me 90 days to work on a fix before public exposure. diff --git a/ci/cache_datasets.py b/ci/cache_datasets.py new file mode 100644 index 0000000000..2cf7444164 --- /dev/null +++ b/ci/cache_datasets.py @@ -0,0 +1,27 @@ +""" +Cache test datasets before running tests / building docs. + +Avoids race conditions that would arise from parallelization. +""" +import pathlib +import re + +from seaborn import load_dataset + +path = pathlib.Path(".") +py_files = path.rglob("*.py") +ipynb_files = path.rglob("*.ipynb") + +datasets = [] + +for fname in py_files: + with open(fname) as fid: + datasets += re.findall(r"load_dataset\(['\"](\w+)['\"]", fid.read()) + +for p in ipynb_files: + with p.open() as fid: + datasets += re.findall(r"load_dataset\(\\['\"](\w+)\\['\"]", fid.read()) + +for name in sorted(set(datasets)): + print(f"Caching {name}") + load_dataset(name) diff --git a/ci/check_gallery.py b/ci/check_gallery.py new file mode 100644 index 0000000000..60db2e12c6 --- /dev/null +++ b/ci/check_gallery.py @@ -0,0 +1,14 @@ +"""Execute the scripts that comprise the example gallery in the online docs.""" +from glob import glob +import matplotlib.pyplot as plt + +if __name__ == "__main__": + + fnames = sorted(glob("examples/*.py")) + + for fname in fnames: + + print(f"- {fname}") + with open(fname) as fid: + exec(fid.read()) + plt.close("all") diff --git a/ci/deps_pinned.txt b/ci/deps_pinned.txt new file mode 100644 index 0000000000..0b8cd9f7d7 --- /dev/null +++ b/ci/deps_pinned.txt @@ -0,0 +1,6 @@ +numpy~=1.20.0 +pandas~=1.2.0 +matplotlib~=3.4.0 +scipy~=1.7.0 +statsmodels~=0.12.0 +pillow~=10.3.0 diff --git a/ci/getmsfonts.sh b/ci/getmsfonts.sh new file mode 100644 index 0000000000..bb8feba027 --- /dev/null +++ b/ci/getmsfonts.sh @@ -0,0 +1,2 @@ +echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections +apt-get install msttcorefonts -qq diff --git a/devel_requirements_py2.txt b/devel_requirements_py2.txt deleted file mode 100644 index c86433fdce..0000000000 --- a/devel_requirements_py2.txt +++ /dev/null @@ -1,6 +0,0 @@ -ipython>=1.0 -sphinx>=1.2 -sphinx_bootstrap_theme -numpydoc -PIL -nose diff --git a/devel_requirements_py3.txt b/devel_requirements_py3.txt deleted file mode 100644 index 3cdd2bb287..0000000000 --- a/devel_requirements_py3.txt +++ /dev/null @@ -1,6 +0,0 @@ -ipython>=1.0 -sphinx>=1.2 -sphinx_bootstrap_theme -numpydoc -pillow -nose diff --git a/doc/.gitignore b/doc/.gitignore index ae76f09488..a1d3570a82 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -2,12 +2,8 @@ _build/ generated/ examples/ -example_thumbs/ -aesthetics.rst -color_palettes.rst -quantitative_linear_models.rst -categorical_linear_models.rst -plotting_distributions.rst -dataset_exploration.rst -timeseries_plots.rst -axis_grids.rst +example_thumbs/*.png +docstrings/ +tutorial/ +tutorial/_images +tutorial.rst diff --git a/doc/Makefile b/doc/Makefile index a0df55eeeb..c9a433e4e9 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -18,7 +18,9 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . help: @echo "Please use \`make ' where is one of" + @echo " clean to remove generated output" @echo " html to make standalone HTML files" + @echo " notebooks to make the Jupyter notebook-based tutorials" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @@ -42,12 +44,22 @@ clean: -rm -rf $(BUILDDIR)/* -rm -rf examples/* -rm -rf example_thumbs/* - -rm -rf tutorial/*_files/ - -rm -rf tutorial/*.rst + -rm -rf generated/* + -rm -rf tutorial.rst + -$(MAKE) -C _docstrings clean + -$(MAKE) -C _tutorial clean -notebooks: +.PHONY: tutorials +tutorials: + @mkdir -p tutorial + @$(MAKE) -C _tutorial - make -C tutorial notebooks +.PHONY: docstrings +docstrings: + @mkdir -p docstrings + @$(MAKE) -C _docstrings + +notebooks: tutorials docstrings html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @@ -159,11 +171,3 @@ doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." - -upload: - rsync -azP $(BUILDDIR)/html/ mwaskom@cardinal.stanford.edu:WWW/software/seaborn - @echo "Uploaded to Stanford webspace" - -upload-dev: - rsync -azP $(BUILDDIR)/html/ mwaskom@cardinal.stanford.edu:WWW/software/seaborn-dev - @echo "Uploaded to Stanford webspace (development page)" diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000000..78cfc1ef64 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,12 @@ +Building the seaborn docs +========================= + +Building the docs requires additional dependencies; they can be installed with `pip install seaborn[stats,docs]`. + +The build process involves conversion of Jupyter notebooks to `rst` files. To facilitate this, you may need to set `NB_KERNEL` environment variable to the name of a kernel on your machine (e.g. `export NB_KERNEL="python3"`). To get a list of available Python kernels, run `jupyter kernelspec list`. + +After you're set up, run `make notebooks html` from the `doc` directory to convert all notebooks, generate all gallery examples, and build the documentation itself. The site will live in `_build/html`. + +Run `make clean` to delete the built site and all intermediate files. Run `make -C docstrings clean` or `make -C tutorial clean` to remove intermediate files for the API or tutorial components. + +If your goal is to obtain an offline copy of the docs for a released version, it may be easier to clone the [website repository](https://github.com/seaborn/seaborn.github.io) or to download a zipfile corresponding to a [specific version](https://github.com/seaborn/seaborn.github.io/tags). diff --git a/doc/_docstrings/FacetGrid.ipynb b/doc/_docstrings/FacetGrid.ipynb new file mode 100644 index 0000000000..28af34c3c5 --- /dev/null +++ b/doc/_docstrings/FacetGrid.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"ticks\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calling the constructor requires a long-form data object. This initializes the grid, but doesn't plot anything on it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.FacetGrid(tips)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assign column and/or row variables to add more subplots to the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.FacetGrid(tips, col=\"time\", row=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To draw a plot on every facet, pass a function and the name of one or more columns in the dataframe to :meth:`FacetGrid.map`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\", row=\"sex\")\n", + "g.map(sns.scatterplot, \"total_bill\", \"tip\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The variable specification in :meth:`FacetGrid.map` requires a positional argument mapping, but if the function has a ``data`` parameter and accepts named variable assignments, you can also use :meth:`FacetGrid.map_dataframe`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\", row=\"sex\")\n", + "g.map_dataframe(sns.histplot, x=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Notice how the bins have different widths in each facet. A separate plot is drawn on each facet, so if the plotting function derives any parameters from the data, they may not be shared across facets. You can pass additional keyword arguments to synchronize them. But when possible, using a figure-level function like :func:`displot` will take care of this bookkeeping for you:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\", row=\"sex\")\n", + "g.map_dataframe(sns.histplot, x=\"total_bill\", binwidth=2, binrange=(0, 60))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :class:`FacetGrid` constructor accepts a ``hue`` parameter. Setting this will condition the data on another variable and make multiple plots in different colors. Where possible, label information is tracked so that a single legend can be drawn:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\", hue=\"sex\")\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\")\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When ``hue`` is set on the :class:`FacetGrid`, however, a separate plot is drawn for each level of the variable. If the plotting function understands ``hue``, it is better to let it handle that logic. But it is important to ensure that each facet will use the same hue mapping. In the sample ``tips`` data, the ``sex`` column has a categorical datatype, which ensures this. Otherwise, you may want to use the `hue_order` or similar parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\")\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\", hue=\"sex\")\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The size and shape of the plot is specified at the level of each subplot using the ``height`` and ``aspect`` parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"day\", height=3.5, aspect=.65)\n", + "g.map(sns.histplot, \"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If the variable assigned to ``col`` has many levels, it is possible to \"wrap\" it so that it spans multiple rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"size\", height=2.5, col_wrap=3)\n", + "g.map(sns.histplot, \"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To add horizontal or vertical reference lines on every facet, use :meth:`FacetGrid.refline`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\", margin_titles=True)\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\")\n", + "g.refline(y=tips[\"tip\"].median())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can pass custom functions to plot with, or to annotate each facet. Your custom function must use the matplotlib state-machine interface to plot on the \"current\" axes, and it should catch additional keyword arguments:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "def annotate(data, **kws):\n", + " n = len(data)\n", + " ax = plt.gca()\n", + " ax.text(.1, .6, f\"N = {n}\", transform=ax.transAxes)\n", + "\n", + "g = sns.FacetGrid(tips, col=\"time\")\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\")\n", + "g.map_dataframe(annotate)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :class:`FacetGrid` object has some other useful parameters and methods for tweaking the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"sex\", row=\"time\", margin_titles=True)\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\")\n", + "g.set_axis_labels(\"Total bill ($)\", \"Tip ($)\")\n", + "g.set_titles(col_template=\"{col_name} patrons\", row_template=\"{row_name}\")\n", + "g.set(xlim=(0, 60), ylim=(0, 12), xticks=[10, 30, 50], yticks=[2, 6, 10])\n", + "g.tight_layout()\n", + "g.savefig(\"facet_plot.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "if os.path.exists(\"facet_plot.png\"):\n", + " os.remove(\"facet_plot.png\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You also have access to the underlying matplotlib objects for additional tweaking:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"sex\", row=\"time\", margin_titles=True, despine=False)\n", + "g.map_dataframe(sns.scatterplot, x=\"total_bill\", y=\"tip\")\n", + "g.figure.subplots_adjust(wspace=0, hspace=0)\n", + "for (row_val, col_val), ax in g.axes_dict.items():\n", + " if row_val == \"Lunch\" and col_val == \"Female\":\n", + " ax.set_facecolor(\".95\")\n", + " else:\n", + " ax.set_facecolor((0, 0, 0, 0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/JointGrid.ipynb b/doc/_docstrings/JointGrid.ipynb new file mode 100644 index 0000000000..272bf3c3e7 --- /dev/null +++ b/doc/_docstrings/JointGrid.ipynb @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Calling the constructor initializes the figure, but it does not plot anything:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The simplest plotting method, :meth:`JointGrid.plot` accepts a pair of functions (one for the joint axes and one for both marginal axes):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot(sns.scatterplot, sns.histplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :meth:`JointGrid.plot` function also accepts additional keyword arguments, but it passes them to both functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot(sns.scatterplot, sns.histplot, alpha=.7, edgecolor=\".2\", linewidth=.5)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If you need to pass different keyword arguments to each function, you'll have to invoke :meth:`JointGrid.plot_joint` and :meth:`JointGrid.plot_marginals`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot_joint(sns.scatterplot, s=100, alpha=.5)\n", + "g.plot_marginals(sns.histplot, kde=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You can also set up the grid without assigning any data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You can then plot by accessing the ``ax_joint``, ``ax_marg_x``, and ``ax_marg_y`` attributes, which are :class:`matplotlib.axes.Axes` objects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid()\n", + "x, y = penguins[\"bill_length_mm\"], penguins[\"bill_depth_mm\"]\n", + "sns.scatterplot(x=x, y=y, ec=\"b\", fc=\"none\", s=100, linewidth=1.5, ax=g.ax_joint)\n", + "sns.histplot(x=x, fill=False, linewidth=2, ax=g.ax_marg_x)\n", + "sns.kdeplot(y=y, linewidth=2, ax=g.ax_marg_y)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The plotting methods can use any seaborn functions that accept ``x`` and ``y`` variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot(sns.regplot, sns.boxplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If the functions accept a ``hue`` variable, you can use it by assigning ``hue`` when you call the constructor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\")\n", + "g.plot(sns.scatterplot, sns.histplot)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Horizontal and/or vertical reference lines can be added to the joint and/or marginal axes using :meth:`JointGrid.refline`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot(sns.scatterplot, sns.histplot)\n", + "g.refline(x=45, y=16)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The figure will always be square (unless you resize it at the matplotlib layer), but its overall size and layout are configurable. The size is controlled by the ``height`` parameter. The relative ratio between the joint and marginal axes is controlled by ``ratio``, and the amount of space between the plots is controlled by ``space``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.JointGrid(height=4, ratio=2, space=.05)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the ticks on the density axis of the marginal plots are turned off, but this is configurable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.JointGrid(marginal_ticks=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Limits on the two data axes (which are shared across plots) can also be defined when setting up the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.JointGrid(xlim=(-2, 5), ylim=(0, 10))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/Makefile b/doc/_docstrings/Makefile new file mode 100644 index 0000000000..11657fef0e --- /dev/null +++ b/doc/_docstrings/Makefile @@ -0,0 +1,14 @@ +rst_files := $(patsubst %.ipynb,../docstrings/%.rst,$(wildcard *.ipynb)) +export MPLBACKEND := module://matplotlib_inline.backend_inline + +docstrings: ${rst_files} + +../docstrings/%.rst: %.ipynb + ../tools/nb_to_doc.py $*.ipynb ../docstrings + @cp -r ../docstrings/$*_files ../generated/ + @if [ -f ../generated/seaborn.$*.rst ]; then \ + touch ../generated/seaborn.$*.rst; \ + fi + +clean: + rm -rf ../docstrings diff --git a/doc/_docstrings/PairGrid.ipynb b/doc/_docstrings/PairGrid.ipynb new file mode 100644 index 0000000000..1a9c897c0d --- /dev/null +++ b/doc/_docstrings/PairGrid.ipynb @@ -0,0 +1,271 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set_theme()\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Calling the constructor sets up a blank grid of subplots with each row and one column corresponding to a numeric variable in the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "g = sns.PairGrid(penguins)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Passing a bivariate function to :meth:`PairGrid.map` will draw a bivariate plot on every axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins)\n", + "g.map(sns.scatterplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Passing separate functions to :meth:`PairGrid.map_diag` and :meth:`PairGrid.map_offdiag` will show each variable's marginal distribution on the diagonal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins)\n", + "g.map_diag(sns.histplot)\n", + "g.map_offdiag(sns.scatterplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to use different functions on the upper and lower triangles of the plot (which are otherwise redundant):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, diag_sharey=False)\n", + "g.map_upper(sns.scatterplot)\n", + "g.map_lower(sns.kdeplot)\n", + "g.map_diag(sns.kdeplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or to avoid the redundancy altogether:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, diag_sharey=False, corner=True)\n", + "g.map_lower(sns.scatterplot)\n", + "g.map_diag(sns.kdeplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :class:`PairGrid` constructor accepts a ``hue`` variable. This variable is passed directly to functions that understand it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"species\")\n", + "g.map_diag(sns.histplot)\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "But you can also pass matplotlib functions, in which case a groupby is performed internally and a separate plot is drawn for each level:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"species\")\n", + "g.map_diag(plt.hist)\n", + "g.map_offdiag(plt.scatter)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Additional semantic variables can be assigned by passing data vectors directly while mapping the function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"species\")\n", + "g.map_diag(sns.histplot)\n", + "g.map_offdiag(sns.scatterplot, size=penguins[\"sex\"])\n", + "g.add_legend(title=\"\", adjust_subtitles=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When using seaborn functions that can implement a numeric hue mapping, you will want to disable mapping of the variable on the diagonal axes. Note that the ``hue`` variable is excluded from the list of variables shown by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"body_mass_g\")\n", + "g.map_diag(sns.histplot, hue=None, color=\".3\")\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``vars`` parameter can be used to control exactly which variables are used:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "variables = [\"body_mass_g\", \"bill_length_mm\", \"flipper_length_mm\"]\n", + "g = sns.PairGrid(penguins, hue=\"body_mass_g\", vars=variables)\n", + "g.map_diag(sns.histplot, hue=None, color=\".3\")\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The plot need not be square: separate variables can be used to define the rows and columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_vars = [\"body_mass_g\", \"bill_length_mm\", \"bill_depth_mm\", \"flipper_length_mm\"]\n", + "y_vars = [\"body_mass_g\"]\n", + "g = sns.PairGrid(penguins, hue=\"species\", x_vars=x_vars, y_vars=y_vars)\n", + "g.map_diag(sns.histplot, color=\".3\")\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It can be useful to explore different approaches to resolving multiple distributions on the diagonal axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"species\")\n", + "g.map_diag(sns.histplot, multiple=\"stack\", element=\"step\")\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/axes_style.ipynb b/doc/_docstrings/axes_style.ipynb new file mode 100644 index 0000000000..7ba9aa599a --- /dev/null +++ b/doc/_docstrings/axes_style.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "dated-mother", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "id": "prospective-sellers", + "metadata": {}, + "source": [ + "Calling with no arguments will return the current defaults for the style parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "recognized-rehabilitation", + "metadata": { + "tags": [ + "show-output" + ] + }, + "outputs": [], + "source": [ + "sns.axes_style()" + ] + }, + { + "cell_type": "markdown", + "id": "furnished-irrigation", + "metadata": {}, + "source": [ + "Calling with the name of a predefined style will show those parameter values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "coordinate-reward", + "metadata": { + "tags": [ + "show-output" + ] + }, + "outputs": [], + "source": [ + "sns.axes_style(\"darkgrid\")" + ] + }, + { + "cell_type": "markdown", + "id": "mediterranean-picking", + "metadata": {}, + "source": [ + "Use the function as a context manager to temporarily change the style of your plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "missing-essence", + "metadata": {}, + "outputs": [], + "source": [ + "with sns.axes_style(\"whitegrid\"):\n", + " sns.barplot(x=[1, 2, 3], y=[2, 5, 3])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/barplot.ipynb b/doc/_docstrings/barplot.ipynb new file mode 100644 index 0000000000..bb1e6d193c --- /dev/null +++ b/doc/_docstrings/barplot.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6a6d582b-08c2-4fed-be56-afa1b986943a", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")\n", + "penguins = sns.load_dataset(\"penguins\")\n", + "flights = sns.load_dataset(\"flights\")" + ] + }, + { + "cell_type": "raw", + "id": "b53b65b8-5670-4905-aa39-36db04f4b813", + "metadata": {}, + "source": [ + "With long data, assign `x` and `y` to group by a categorical variable and plot aggregated values, with confidence intervals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f5c3ece-6295-4933-8a87-e80cd604c089", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(penguins, x=\"island\", y=\"body_mass_g\")" + ] + }, + { + "cell_type": "raw", + "id": "ed061d6f-bd3b-4189-bbc7-aed998be05cb", + "metadata": {}, + "source": [ + "Prior to v0.13.0, each bar would have a different color. To replicate this behavior, assign the grouping variable to `hue` as well:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ded2e23-c610-450b-bcd2-1d2ba54db566", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(penguins, x=\"body_mass_g\", y=\"island\", hue=\"island\", legend=False)" + ] + }, + { + "cell_type": "raw", + "id": "e00fa127-4dd4-4565-9897-51317adfea3c", + "metadata": {}, + "source": [ + "When plotting a \"wide-form\" dataframe, each column will be aggregated and represented as a bar:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae7e0f4e-471e-4dee-8913-5e7b67e0a381", + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide = flights.pivot(index=\"year\", columns=\"month\", values=\"passengers\")\n", + "sns.barplot(flights_wide)" + ] + }, + { + "cell_type": "raw", + "id": "6020404c-15c6-4c00-9ffd-6c12ba624e52", + "metadata": {}, + "source": [ + "Passing only a series (or dict) will plot each of its values, using the index (or keys) to label the bars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77b2c3eb-c3e4-4d44-929a-27a456da4b88", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(flights_wide[\"Jun\"])" + ] + }, + { + "cell_type": "raw", + "id": "b0c3b101-7649-4014-9ab2-10ff206d39d7", + "metadata": {}, + "source": [ + "With long-form data, you can add a second layer of grouping with `hue`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac1a28d1-b3bd-4158-86d0-3defc12f8566", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(penguins, x=\"island\", y=\"body_mass_g\", hue=\"sex\")" + ] + }, + { + "cell_type": "raw", + "id": "069ce509-ee0d-42c8-b053-1b4b6d764449", + "metadata": {}, + "source": [ + "Use the error bars to show the standard deviation rather than a confidence interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10445b78-a74a-4f14-a28b-a9164e592ae4", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(penguins, x=\"island\", y=\"body_mass_g\", errorbar=\"sd\")" + ] + }, + { + "cell_type": "raw", + "id": "6dc3d564-4d26-4753-a2a0-6194b10452bc", + "metadata": {}, + "source": [ + "Use a different aggregation function and disable the error bars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "448ba05e-c533-459d-84b6-0fca80e6e3ce", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(flights, x=\"year\", y=\"passengers\", estimator=\"sum\", errorbar=None)" + ] + }, + { + "cell_type": "raw", + "id": "7746220d-b6b4-4ee5-886c-5867db35d4e3", + "metadata": {}, + "source": [ + "Add text labels with each bar's value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e343485c-636e-4b96-b20d-59a7f7155be8", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.barplot(flights, x=\"year\", y=\"passengers\", estimator=\"sum\", errorbar=None)\n", + "ax.bar_label(ax.containers[0], fontsize=10);" + ] + }, + { + "cell_type": "raw", + "id": "457702c2-9fa6-4021-a19b-f44b39aa0a19", + "metadata": {}, + "source": [ + "Preserve the original scaling of the grouping variable and add annotations in numeric coordinates:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08b60118-5830-4fd7-8a66-431c065d57cb", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.barplot(\n", + " flights, x=\"year\", y=\"passengers\",\n", + " native_scale=True,\n", + " estimator=\"sum\", errorbar=None,\n", + ")\n", + "ax.plot(1955, 3600, \"*\", markersize=10, color=\"r\")" + ] + }, + { + "cell_type": "raw", + "id": "206be839-f33b-4ffe-8101-bd98bc5942b8", + "metadata": {}, + "source": [ + "Use `orient` to resolve ambiguity about which variable should group when both are numeric:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aff3c69-3c24-40ad-af12-a507e33f5d3f", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(flights, x=\"passengers\", y=\"year\", orient=\"y\")" + ] + }, + { + "cell_type": "raw", + "id": "90277a3b-1f86-4884-97ad-e5d65df408ef", + "metadata": {}, + "source": [ + "Customize the appearance of the plot using :class:`matplotlib.patches.Rectangle` and :class:`matplotlib.lines.Line2D` keyword arguments:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6f9ac1c-a77d-4ee3-bc5e-fec2071b33df", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(\n", + " penguins, x=\"body_mass_g\", y=\"island\",\n", + " errorbar=(\"pi\", 50), capsize=.4,\n", + " err_kws={\"color\": \".5\", \"linewidth\": 2.5},\n", + " linewidth=2.5, edgecolor=\".5\", facecolor=(0, 0, 0, 0),\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "08ef562f-13a3-4da5-a9cf-46deaa543890", + "metadata": {}, + "source": [ + "Use :func:`catplot` to draw faceted bars, which is recommended over working directly with :class:`FacetGrid`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d23777f-8a69-4c68-ab35-3e6740c61bcf", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " penguins, kind=\"bar\",\n", + " x=\"sex\", y=\"body_mass_g\", col=\"species\",\n", + " height=4, aspect=.5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b6a62b9-eef7-4c85-a1c2-85a58231e6c6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/blend_palette.ipynb b/doc/_docstrings/blend_palette.ipynb new file mode 100644 index 0000000000..302f93e96b --- /dev/null +++ b/doc/_docstrings/blend_palette.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8f97280e-cec8-42b2-a968-4fd4364594f8", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "972edede-df1a-4010-9674-00b864d020e2", + "metadata": {}, + "source": [ + "Pass a list of two colors to interpolate between them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6ae2547-1042-4ac0-84ea-6f37a0229871", + "metadata": {}, + "outputs": [], + "source": [ + "sns.blend_palette([\"b\", \"r\"])" + ] + }, + { + "cell_type": "raw", + "id": "1d983eac-2dd5-4746-b27f-4dfa19b5e091", + "metadata": {}, + "source": [ + "The color list can be arbitrarily long, and any color format can be used:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "846b78fd-30ce-4507-93f4-4274122c1987", + "metadata": {}, + "outputs": [], + "source": [ + "sns.blend_palette([\"#45a872\", \".8\", \"xkcd:golden\"])" + ] + }, + { + "cell_type": "raw", + "id": "318fef32-1f83-44d9-9ff9-21fa0231b7c6", + "metadata": {}, + "source": [ + "Return a continuous colormap instead of a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0a05bc3-c60b-47a1-b276-d2e28a4a8226", + "metadata": {}, + "outputs": [], + "source": [ + "sns.blend_palette([\"#bdc\", \"#7b9\", \"#47a\"], as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0473a402-0ec2-4877-81d2-ed6c57aefc77", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/boxenplot.ipynb b/doc/_docstrings/boxenplot.ipynb new file mode 100644 index 0000000000..71a8ad9dc1 --- /dev/null +++ b/doc/_docstrings/boxenplot.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "882d215b-88d8-4b5e-ae7a-0e3f6bb53bad", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")\n", + "diamonds = sns.load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "9b8b892e-a96f-46e8-9c5e-8749783608d8", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Draw a single horizontal plot, assigning the data directly to the coordinate variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "391e1162-b438-4486-9a08-60686ee8e96a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(x=diamonds[\"price\"])" + ] + }, + { + "cell_type": "raw", + "id": "b0c5a469-c709-4333-a8bc-b2cb34f366aa", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Group by a categorical variable, referencing columns in a datafame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e30fec18-f127-40a3-bfaf-f71324dd60ec", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\")" + ] + }, + { + "cell_type": "raw", + "id": "70fe999a-bea5-4b0a-a1a3-474b6696d1be", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Group by another variable, representing it by the color of the boxes. By default, each boxen plot will be \"dodged\" so that they don't overlap; you can also add a small gap between them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eed3239c-57b7-4d76-9fdc-be99257047fd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "large_diamond = diamonds[\"carat\"].gt(1).rename(\"large_diamond\")\n", + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\", hue=large_diamond, gap=.2)" + ] + }, + { + "cell_type": "raw", + "id": "36030c1c-047b-4f7b-b366-91188b41680e", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The default rule for choosing each box width represents the percentile covered by the box. Alternatively, you can reduce each box width by a linear factor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0c1aa43-5e8a-486c-bd6d-3c29d6d23138", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\", width_method=\"linear\")" + ] + }, + { + "cell_type": "raw", + "id": "062a9fc2-9cbe-4e40-af8c-3fd35f785cd5", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The `width` parameter itself, on the other hand, determines the width of the largest box:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4100a460-fe27-42b7-bbaf-4430a1c1359f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\", width=.5)" + ] + }, + { + "cell_type": "raw", + "id": "407874a8-1202-4bcc-9f65-59e1fed29e07", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "There are several different approaches for choosing the number of boxes to draw, including a rule based on the confidence level of the percentile estimate:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aead6a3-6f12-47d3-b472-a39c61867963", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\", k_depth=\"trustworthy\", trust_alpha=0.01)" + ] + }, + { + "cell_type": "raw", + "id": "71212196-d60e-4682-8dcb-0289956be152", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The `linecolor` and `linewidth` parameters control the outlines of the boxes, while the `line_kws` parameter controls the line representing the median and the `flier_kws` parameter controls the appearance of the outliers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd103426-a99f-476b-ae29-a11d52958cdb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(\n", + " data=diamonds, x=\"price\", y=\"clarity\",\n", + " linewidth=.5, linecolor=\".7\",\n", + " line_kws=dict(linewidth=1.5, color=\"#cde\"),\n", + " flier_kws=dict(facecolor=\".7\", linewidth=.5),\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "16f1c534-3316-4752-ae12-f65dee9275cb", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "It is also possible to draw unfilled boxes. With unfilled boxes, all elements will be drawn as line art and follow `hue`, when used:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab6aef09-5bbe-4c01-b6ba-05446982d775", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sns.boxenplot(data=diamonds, x=\"price\", y=\"clarity\", hue=\"clarity\", fill=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e059b944-ea59-408d-87bb-4ce65074dab5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/boxplot.ipynb b/doc/_docstrings/boxplot.ipynb new file mode 100644 index 0000000000..050642685a --- /dev/null +++ b/doc/_docstrings/boxplot.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7edcf92f-6c11-4dc4-b684-118b3235d067", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")\n", + "titanic = sns.load_dataset(\"titanic\")" + ] + }, + { + "cell_type": "raw", + "id": "4ca96805-333b-4186-9ad7-dcef4a9aacf5", + "metadata": {}, + "source": [ + "Draw a single horizontal boxplot, assigning the data directly to the coordinate variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80532f2c-0f34-456c-9d5c-673682385461", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(x=titanic[\"age\"])" + ] + }, + { + "cell_type": "raw", + "id": "d9e33318-9595-4132-bfbd-8d88905fea79", + "metadata": {}, + "source": [ + "Group by a categorical variable, referencing columns in a dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1e0a6a4-151d-42d7-a098-ec9b91f20906", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"age\", y=\"class\")" + ] + }, + { + "cell_type": "raw", + "id": "d1e0d9e7-2d9b-49e3-8bb3-d97f2de7e733", + "metadata": {}, + "source": [ + "Draw a vertical boxplot with nested grouping by two variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8f74dc4-2b59-423a-90a7-dbf900c89251", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"class\", y=\"age\", hue=\"alive\")" + ] + }, + { + "cell_type": "raw", + "id": "59aaff3f-2bba-44d1-9901-2dd680bad3ad", + "metadata": {}, + "source": [ + "Draw the boxes as line art and add a small gap between them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6af681be-c49e-4794-8a92-90c58ef330f9", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"class\", y=\"age\", hue=\"alive\", fill=False, gap=.1)" + ] + }, + { + "cell_type": "raw", + "id": "db4ef9cb-0f0d-458b-a06d-c537c2b4d733", + "metadata": {}, + "source": [ + "Cover the full range of the data with the whiskers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89aab45a-bc58-44e9-94ac-6a9aa0b20f5e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"age\", y=\"deck\", whis=(0, 100))" + ] + }, + { + "cell_type": "raw", + "id": "3844cc78-19a5-46e3-babd-77d6d7affcf0", + "metadata": {}, + "source": [ + "Draw narrower boxes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "399825eb-698a-4464-8a04-505b6bf7edc7", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"age\", y=\"deck\", width=.5)" + ] + }, + { + "cell_type": "raw", + "id": "eaf35104-022d-4a20-9b60-f8b24acc7471", + "metadata": {}, + "source": [ + "Modify the color and width of all the line artists:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e9dcaa3-b497-480e-b134-d31e01a7d4c5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(data=titanic, x=\"age\", y=\"deck\", color=\".8\", linecolor=\"#137\", linewidth=.75)" + ] + }, + { + "cell_type": "markdown", + "id": "8a188c80-d69f-4a07-9b0d-ca467d2be680", + "metadata": {}, + "source": [ + "Group by a numeric variable and preserve its native scaling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d73c63f-58a8-4659-96fd-964493ba3a50", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.boxplot(x=titanic[\"age\"].round(-1), y=titanic[\"fare\"], native_scale=True)\n", + "ax.axvline(25, color=\".3\", dashes=(2, 2))" + ] + }, + { + "cell_type": "raw", + "id": "28536179-8400-462d-bf3e-3d9f353fe03b", + "metadata": {}, + "source": [ + "Customize the plot using parameters of the underlying matplotlib function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66c81b6e-e7fb-46c5-aa7b-f001241569b0", + "metadata": {}, + "outputs": [], + "source": [ + "sns.boxplot(\n", + " data=titanic, x=\"age\", y=\"class\",\n", + " notch=True, showcaps=False,\n", + " flierprops={\"marker\": \"x\"},\n", + " boxprops={\"facecolor\": (.3, .5, .7, .5)},\n", + " medianprops={\"color\": \"r\", \"linewidth\": 2},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d2bb11b-0f4a-4efe-b18b-be34ebf24e49", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/catplot.ipynb b/doc/_docstrings/catplot.ipynb new file mode 100644 index 0000000000..fcfff16beb --- /dev/null +++ b/doc/_docstrings/catplot.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a8aa6a6a-f6c0-4a6b-9460-2056e58a2e13", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")" + ] + }, + { + "cell_type": "raw", + "id": "1aef2740-ae6e-4a1b-a588-3ad978e2614d", + "metadata": {}, + "source": [ + "By default, the visual representation will be a jittered strip plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75a49e26-4318-4963-897c-dc0081aebfb3", + "metadata": {}, + "outputs": [], + "source": [ + "df = sns.load_dataset(\"titanic\")\n", + "sns.catplot(data=df, x=\"age\", y=\"class\")" + ] + }, + { + "cell_type": "markdown", + "id": "db1b8f6d-5264-4200-b81a-b0ee64040a1f", + "metadata": {}, + "source": [ + "Use `kind` to select a different representation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75ecd034-8536-4fe4-8852-a3975dba64dc", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=df, x=\"age\", y=\"class\", kind=\"box\")" + ] + }, + { + "cell_type": "markdown", + "id": "8aee79a9-b8b3-4129-b6d7-e9e32ae1e634", + "metadata": {}, + "source": [ + "One advantage is that the legend will be automatically placed outside the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3798aac6-1ff6-4e36-ad83-4742fcb04159", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=df, x=\"age\", y=\"class\", hue=\"sex\", kind=\"boxen\")" + ] + }, + { + "cell_type": "markdown", + "id": "8a3777e1-90b6-4f4d-9e14-247b6dfd64fe", + "metadata": {}, + "source": [ + "Additional keyword arguments get passed through to the underlying seaborn function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afcff2fe-db11-4602-af79-68e4a0380f88", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=df, x=\"age\", y=\"class\", hue=\"sex\",\n", + " kind=\"violin\", bw_adjust=.5, cut=0, split=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a75bf46f-a3d0-4a5d-abcd-b9e85def65b0", + "metadata": {}, + "source": [ + "Assigning a variable to `col` or `row` will automatically create subplots. Control figure size with the `height` and `aspect` parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "835afcf2-ecc9-4edb-9ec8-24484c5b08fb", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=df, x=\"class\", y=\"survived\", col=\"sex\",\n", + " kind=\"bar\", height=4, aspect=.6,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ecf323fe-1e86-47ff-aa50-e8c297cfa125", + "metadata": {}, + "source": [ + "For single-subplot figures, it is easy to layer different representations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc5b0fc0-359c-4219-b04e-171d8c7c8051", + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=df, x=\"age\", y=\"class\", kind=\"violin\", color=\".9\", inner=None)\n", + "sns.swarmplot(data=df, x=\"age\", y=\"class\", size=3)" + ] + }, + { + "cell_type": "raw", + "id": "26e06ba4-0457-4597-b699-cb0fe8b2be32", + "metadata": {}, + "source": [ + "Use methods on the returned :class:`FacetGrid` to tweak the presentation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a43f1914-d868-4060-82df-b3d25553d595", + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.catplot(\n", + " data=df, x=\"who\", y=\"survived\", col=\"class\",\n", + " kind=\"bar\", height=4, aspect=.6,\n", + ")\n", + "g.set_axis_labels(\"\", \"Survival Rate\")\n", + "g.set_xticklabels([\"Men\", \"Women\", \"Children\"])\n", + "g.set_titles(\"{col_name} {col_var}\")\n", + "g.set(ylim=(0, 1))\n", + "g.despine(left=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a529c18c-45bc-4efb-8ae0-c14518349162", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/clustermap.ipynb b/doc/_docstrings/clustermap.ipynb new file mode 100644 index 0000000000..487cec7646 --- /dev/null +++ b/doc/_docstrings/clustermap.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "ffc1e1d9-fa74-4121-aa87-e1a8665e4c2b", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "id": "41b4f602-32af-44f8-bf1a-0f1695c9abbb", + "metadata": {}, + "source": [ + "Plot a heatmap with row and column clustering:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c715bd8f-cf5d-4caa-9244-336b3d0248a8", + "metadata": {}, + "outputs": [], + "source": [ + "iris = sns.load_dataset(\"iris\")\n", + "species = iris.pop(\"species\")\n", + "sns.clustermap(iris)" + ] + }, + { + "cell_type": "raw", + "id": "1cc3134c-579a-442a-97d8-a878651ce90a", + "metadata": {}, + "source": [ + "Change the size and layout of the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd33cf4b-9589-4b9a-a246-0b95bad28c51", + "metadata": {}, + "outputs": [], + "source": [ + "sns.clustermap(\n", + " iris,\n", + " figsize=(7, 5),\n", + " row_cluster=False,\n", + " dendrogram_ratio=(.1, .2),\n", + " cbar_pos=(0, .2, .03, .4)\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "c5d3408d-f5d6-4045-9d61-15573a981587", + "metadata": {}, + "source": [ + "Add colored labels to identify observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d3fe52-6146-4f33-a39a-1d4a47243ea5", + "metadata": {}, + "outputs": [], + "source": [ + "lut = dict(zip(species.unique(), \"rbg\"))\n", + "row_colors = species.map(lut)\n", + "sns.clustermap(iris, row_colors=row_colors)" + ] + }, + { + "cell_type": "raw", + "id": "f2f944e2-36cd-4653-86b4-6d2affec13d6", + "metadata": {}, + "source": [ + "Use a different colormap and adjust the limits of the color range:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6137c7ad-db92-47b8-9d00-3228c4e1f7df", + "metadata": {}, + "outputs": [], + "source": [ + "sns.clustermap(iris, cmap=\"mako\", vmin=0, vmax=10)" + ] + }, + { + "cell_type": "raw", + "id": "93f96d1c-9d04-464f-93c9-4319caa8504a", + "metadata": {}, + "source": [ + "Use differente clustering parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9e76bde-a222-4eca-971f-54f56ad53281", + "metadata": {}, + "outputs": [], + "source": [ + "sns.clustermap(iris, metric=\"correlation\", method=\"single\")" + ] + }, + { + "cell_type": "raw", + "id": "ea6ed3fd-188d-4244-adac-ec0169c02205", + "metadata": {}, + "source": [ + "Standardize the data within the columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5f744c4-b959-4ed1-b2cf-6046c9214568", + "metadata": {}, + "outputs": [], + "source": [ + "sns.clustermap(iris, standard_scale=1)" + ] + }, + { + "cell_type": "raw", + "id": "7ca72242-4eb0-4f8e-b0c0-d1ef7166b738", + "metadata": {}, + "source": [ + "Normalize the data within rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33815c4c-9bae-4226-bd11-3dfdb7ecab2b", + "metadata": {}, + "outputs": [], + "source": [ + "sns.clustermap(iris, z_score=0, cmap=\"vlag\", center=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f37d57a-b049-4665-9c24-4d5fbbca00ba", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/color_palette.ipynb b/doc/_docstrings/color_palette.ipynb new file mode 100644 index 0000000000..b896c7b743 --- /dev/null +++ b/doc/_docstrings/color_palette.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Calling with no arguments returns all colors from the current default\n", + "color cycle:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Other variants on the seaborn categorical color palette can be referenced by name:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"pastel\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a specified number of evenly spaced hues in the \"HUSL\" system:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"husl\", 9)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return all unique colors in a categorical Color Brewer palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Set2\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a diverging Color Brewer palette as a continuous colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Spectral\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return one of the perceptually-uniform palettes included in seaborn as a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"flare\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return one of the perceptually-uniform palettes included in seaborn as a continuous colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"flare\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a customized cubehelix color palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"ch:s=.25,rot=-.25\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a light sequential gradient:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"light:#5A9\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a reversed dark sequential gradient:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"dark:#5A9_r\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Return a blend gradient between two endpoints:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"blend:#7AB,#EDA\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use as a context manager to change the default qualitative color palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "x, y = list(range(10)), [0] * 10\n", + "hue = list(map(str, x))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with sns.color_palette(\"Set3\"):\n", + " sns.relplot(x=x, y=y, hue=hue, s=500, legend=False, height=1.3, aspect=4)\n", + "\n", + "sns.relplot(x=x, y=y, hue=hue, s=500, legend=False, height=1.3, aspect=4)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "See the underlying color values as hex codes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "show-output" + ] + }, + "outputs": [], + "source": [ + "print(sns.color_palette(\"pastel6\").as_hex())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/countplot.ipynb b/doc/_docstrings/countplot.ipynb new file mode 100644 index 0000000000..5eabcf05ac --- /dev/null +++ b/doc/_docstrings/countplot.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2fdf0f63-d515-4cb8-b3e0-62cac7852b12", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")\n", + "titanic = sns.load_dataset(\"titanic\")" + ] + }, + { + "cell_type": "raw", + "id": "af16d745-734a-4f11-9f8f-fa54deadfb12", + "metadata": {}, + "source": [ + "Show the count of value for a single categorical variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e9d0485-870d-4841-9c84-6e0bacbde7db", + "metadata": {}, + "outputs": [], + "source": [ + "sns.countplot(titanic, x=\"class\")" + ] + }, + { + "cell_type": "raw", + "id": "173f47c4-d5fb-4fc0-bdbd-ec228419d451", + "metadata": {}, + "source": [ + "Group by a second variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26f73c00-a2b3-45c3-b3cd-2babe0a81894", + "metadata": {}, + "outputs": [], + "source": [ + "sns.countplot(titanic, x=\"class\", hue=\"survived\")" + ] + }, + { + "cell_type": "raw", + "id": "377bfb01-64a2-4f07-b06b-fb1a4f7c3b12", + "metadata": {}, + "source": [ + "Normalize the counts to show percentages:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7267aefc-f2bc-4a64-956a-bb25013ca9ec", + "metadata": {}, + "outputs": [], + "source": [ + "sns.countplot(titanic, x=\"class\", hue=\"survived\", stat=\"percent\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/cubehelix_palette.ipynb b/doc/_docstrings/cubehelix_palette.ipynb new file mode 100644 index 0000000000..a996b05864 --- /dev/null +++ b/doc/_docstrings/cubehelix_palette.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "60aebc68-2c7c-4af5-a159-8421e1f94ba6", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "242b3d42-1f10-4da2-9ef9-af06f7fbd724", + "metadata": {}, + "source": [ + "Return a discrete palette with default parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6526accb-9930-4e39-9f58-1ca2941c1c9d", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette()" + ] + }, + { + "cell_type": "raw", + "id": "887a40f0-d949-41fa-9a43-0ee246c9a077", + "metadata": {}, + "source": [ + "Increase the number of colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02833290-b1ee-46df-a2a0-8268fba94628", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(8)" + ] + }, + { + "cell_type": "raw", + "id": "a9eb86c7-f92e-4422-ae62-a2ef136e7e35", + "metadata": {}, + "source": [ + "Return a continuous colormap rather than a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a460efc2-cf0a-46bf-a12f-12870afce8a5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "id": "5b84aa6c-ad79-45b1-a7d2-44b7ecba5f7d", + "metadata": {}, + "source": [ + "Change the starting point of the helix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70ee079a-e760-4d43-8447-648fd236ab15", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(start=2)" + ] + }, + { + "cell_type": "raw", + "id": "5e21fa22-9ac3-4354-8694-967f2447b286", + "metadata": {}, + "source": [ + "Change the amount of rotation in the helix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddb1b8c7-8933-4317-827f-4f10d2b4cecc", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(rot=.2)" + ] + }, + { + "cell_type": "raw", + "id": "fa91aff7-54e7-4754-a13c-b629dfc33e8f", + "metadata": {}, + "source": [ + "Rotate in the reverse direction:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "548a3942-48ae-40d2-abb7-acc2ffd71601", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(rot=-.2)" + ] + }, + { + "cell_type": "raw", + "id": "e7188a1b-183f-4b04-93a0-975c27fe408e", + "metadata": {}, + "source": [ + "Apply a nonlinearity to the luminance ramp:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ced54ff-a396-451e-b17f-2366b56f920b", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(gamma=.5)" + ] + }, + { + "cell_type": "raw", + "id": "bc82ce48-2df3-464e-b70e-a1d73d0432c6", + "metadata": {}, + "source": [ + "Increase the saturation of the colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a38b91a8-3fdc-4293-a3ea-71b4006cd2a1", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(hue=1)" + ] + }, + { + "cell_type": "raw", + "id": "f8d23ba1-013a-489f-94c4-f2080bfdae87", + "metadata": {}, + "source": [ + "Change the luminance at the start and end points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4f05a16-18f0-4c14-99a4-57a0734aad02", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(dark=.25, light=.75)" + ] + }, + { + "cell_type": "raw", + "id": "0bfcc5d9-05ba-4715-94ac-8d430d9416c2", + "metadata": {}, + "source": [ + "Reverse the direction of the luminance ramp:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74563491-5448-42c3-86c5-f5d55ce6924c", + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94a83211-8b8e-4e60-8365-9600e71ddc5d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/dark_palette.ipynb b/doc/_docstrings/dark_palette.ipynb new file mode 100644 index 0000000000..143ce93f4e --- /dev/null +++ b/doc/_docstrings/dark_palette.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5cd1cbb8-ba1a-460b-8e3a-bc285867f1d1", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "b157eb25-015f-4dd6-9785-83ba19cf4f94", + "metadata": {}, + "source": [ + "Define a sequential ramp from a dark gray to a specified color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b655d28-9855-4528-8b8e-a6c50288fd1b", + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette(\"seagreen\")" + ] + }, + { + "cell_type": "raw", + "id": "50053b26-112a-4378-8ef0-9be0fb565ec7", + "metadata": {}, + "source": [ + "Specify the color with a hex code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74ae0d17-f65b-4bcf-ae66-d97d46964d5c", + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette(\"#79C\")" + ] + }, + { + "cell_type": "raw", + "id": "eea376a2-fdf5-40e4-a187-3a28af529072", + "metadata": {}, + "source": [ + "Specify the color from the husl system:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66e451ee-869a-41ea-8dc5-4240b11e7be5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette((20, 60, 50), input=\"husl\")" + ] + }, + { + "cell_type": "raw", + "id": "e4f44dcd-cf49-4920-ac05-b4db67870363", + "metadata": {}, + "source": [ + "Increase the number of colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75985f07-de92-4d8b-89d5-caf445b9375e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette(\"xkcd:golden\", 8)" + ] + }, + { + "cell_type": "raw", + "id": "34687ae8-fd6d-427a-a639-208f19e61122", + "metadata": {}, + "source": [ + "Return a continuous colormap rather than a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c342db4-7f97-40f5-934e-9a82201890d1", + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette(\"#b285bc\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7ebe64b-25fa-4c52-9ebe-fdcbba0ee51e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/displot.ipynb b/doc/_docstrings/displot.ipynb new file mode 100644 index 0000000000..9a4ae10cae --- /dev/null +++ b/doc/_docstrings/displot.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set_theme(style=\"ticks\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The default plot kind is a histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.displot(data=penguins, x=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use the ``kind`` parameter to select a different representation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", kind=\"kde\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are three main plot kinds; in addition to histograms and kernel density estimates (KDEs), you can also draw empirical cumulative distribution functions (ECDFs):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", kind=\"ecdf\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "While in histogram mode, it is also possible to add a KDE curve:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", kde=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To draw a bivariate plot, assign both ``x`` and ``y``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Currently, bivariate plots are available only for histograms and KDEs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", kind=\"kde\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each kind of plot, you can also show individual observations with a marginal \"rug\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.displot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", kind=\"kde\", rug=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Each kind of plot can be drawn separately for subsets of data using ``hue`` mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", kind=\"kde\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional keyword arguments are passed to the appropriate underlying plotting function, allowing for further customization:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The figure is constructed using a :class:`FacetGrid`, meaning that you can also show subsets on distinct subplots, or \"facets\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", col=\"sex\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Because the figure is drawn with a :class:`FacetGrid`, you control its size and shape with the ``height`` and ``aspect`` parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(\n", + " data=penguins, y=\"flipper_length_mm\", hue=\"sex\", col=\"species\",\n", + " kind=\"ecdf\", height=4, aspect=.7,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The function returns the :class:`FacetGrid` object with the plot, and you can use the methods on this object to customize it further:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.displot(\n", + " data=penguins, y=\"flipper_length_mm\", hue=\"sex\", col=\"species\",\n", + " kind=\"kde\", height=4, aspect=.7,\n", + ")\n", + "g.set_axis_labels(\"Density (a.u.)\", \"Flipper length (mm)\")\n", + "g.set_titles(\"{col_name} penguins\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/diverging_palette.ipynb b/doc/_docstrings/diverging_palette.ipynb new file mode 100644 index 0000000000..ea2ad798bf --- /dev/null +++ b/doc/_docstrings/diverging_palette.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "01295cb6-cc7a-4c6d-94cf-9b0e6cde9fa7", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "84880848-0805-4c41-999a-50808b397275", + "metadata": {}, + "source": [ + "Generate diverging ramps from blue to red through white:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643b3e07-8365-46e3-b033-af7a2fdcd158", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(240, 20)" + ] + }, + { + "cell_type": "raw", + "id": "5ae53941-d9d9-4b5a-8abc-173911ebee74", + "metadata": {}, + "source": [ + "Change the center color to be dark:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41f03771-8fb2-46f6-93c5-5a0e28be625c", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(240, 20, center=\"dark\")" + ] + }, + { + "cell_type": "raw", + "id": "0aeb2402-2cbe-4546-a354-f1f501f762ae", + "metadata": {}, + "source": [ + "Return a continuous colormap rather than a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64d335a5-f8b2-433f-a83f-5aeff7db583a", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(240, 20, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "id": "77223a07-8492-4056-a0f7-14e133e3ce2c", + "metadata": {}, + "source": [ + "Increase the amount of separation around the center value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82472c1e-4b16-40eb-be1d-480bbd2aa702", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(240, 20, sep=30, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "id": "966e8594-b458-414c-a7b0-3e804ce407bf", + "metadata": {}, + "source": [ + "Use a magenta-to-green palette instead:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a03f8ede-b424-4e06-beb6-cf63c94bcd9e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(280, 150)" + ] + }, + { + "cell_type": "raw", + "id": "b3b17689-58e2-4065-9d52-1cf5ebcd4e89", + "metadata": {}, + "source": [ + "Decrease the saturation of the endpoints:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02aaa009-f257-4fc7-a2de-40fbb1464490", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(280, 150, s=50)" + ] + }, + { + "cell_type": "raw", + "id": "db75ca48-ba72-4ca2-8480-bc72c20a70cc", + "metadata": {}, + "source": [ + "Decrease the lightness of the endpoints:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e3bcb1-a17c-4465-830f-46043cb6c322", + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(280, 150, l=35)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e42452a-a485-43e7-bbc3-338db58e4637", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e19f523f-c2f7-489a-ba00-326810e31a67", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/ecdfplot.ipynb b/doc/_docstrings/ecdfplot.ipynb new file mode 100644 index 0000000000..71ab3cc9bb --- /dev/null +++ b/doc/_docstrings/ecdfplot.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot a univariate distribution along the x axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set_theme()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.ecdfplot(data=penguins, x=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Flip the plot by assigning the data variable to the y axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.ecdfplot(data=penguins, y=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If neither `x` nor `y` is assigned, the dataset is treated as wide-form, and a histogram is drawn for each numeric column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.ecdfplot(data=penguins.filter(like=\"bill_\", axis=\"columns\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also draw multiple histograms from a long-form dataset with hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.ecdfplot(data=penguins, x=\"bill_length_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default distribution statistic is normalized to show a proportion, but you can show absolute counts or percents instead:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.ecdfplot(data=penguins, x=\"bill_length_mm\", hue=\"species\", stat=\"count\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's also possible to plot the empirical complementary CDF (1 - CDF):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.ecdfplot(data=penguins, x=\"bill_length_mm\", hue=\"species\", complementary=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/heatmap.ipynb b/doc/_docstrings/heatmap.ipynb new file mode 100644 index 0000000000..3ddac77d54 --- /dev/null +++ b/doc/_docstrings/heatmap.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "987b9549-532e-4091-a6cf-007d1b23e825", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "id": "2c78ca60-e232-44f6-956b-b86b472b1c28", + "metadata": {}, + "source": [ + "Pass a :class:`DataFrame` to plot with indices as row/column labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fad17798-c2e3-4334-abf0-0d46153971fa", + "metadata": {}, + "outputs": [], + "source": [ + "glue = sns.load_dataset(\"glue\").pivot(index=\"Model\", columns=\"Task\", values=\"Score\")\n", + "sns.heatmap(glue)" + ] + }, + { + "cell_type": "raw", + "id": "f3255c5f-2477-4d13-b4c2-7e56380e9cc2", + "metadata": {}, + "source": [ + "Use `annot` to represent the cell values with text:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c9f3c73-c8bc-426e-bc67-dec8f807082e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, annot=True)" + ] + }, + { + "cell_type": "raw", + "id": "bc412da8-866a-49b7-8496-01fbf06dd908", + "metadata": {}, + "source": [ + "Control the annotations with a formatting string:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac952d0d-9187-4dff-a560-88430076851a", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, annot=True, fmt=\".1f\")" + ] + }, + { + "cell_type": "raw", + "id": "5eb12725-e9ee-4df0-9708-243d7e0a77b5", + "metadata": {}, + "source": [ + "Use a separate dataframe for the annotations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1189a37f-9f74-455a-a09a-c22e056d8ba7", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, annot=glue.rank(axis=\"columns\"))" + ] + }, + { + "cell_type": "raw", + "id": "253dfb7f-aa12-4716-adc2-3a38b003b2c3", + "metadata": {}, + "source": [ + "Add lines between cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cac673e-9b86-490b-9e67-ec0cf865bede", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, annot=True, linewidth=.5)" + ] + }, + { + "cell_type": "raw", + "id": "b7d3659c-f996-4af3-a612-430d97799c72", + "metadata": {}, + "source": [ + "Select a different colormap by name:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86806d72-e784-430e-8320-48f2c91115bb", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, cmap=\"crest\")" + ] + }, + { + "cell_type": "raw", + "id": "8336fd53-3841-458f-b26c-411efff54d45", + "metadata": {}, + "source": [ + "Or pass a colormap object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9944ff33-991f-4138-a951-e3015c0326f1", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, cmap=sns.cubehelix_palette(as_cmap=True))" + ] + }, + { + "cell_type": "raw", + "id": "52cc4dba-b86a-4da8-9cbd-3f8aa06b43b4", + "metadata": {}, + "source": [ + "Set the colormap norm (data values corresponding to minimum and maximum points):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4ddb41e-c075-41a5-8afe-422ad6d105bf", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(glue, vmin=50, vmax=100)" + ] + }, + { + "cell_type": "raw", + "id": "6e828517-a532-49b1-be11-eda47c50cc37", + "metadata": {}, + "source": [ + "Use methods on the :class:`matplotlib.axes.Axes` object to tweak the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aab26fc-2de4-4d4f-ad08-487809573deb", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.heatmap(glue, annot=True)\n", + "ax.set(xlabel=\"\", ylabel=\"\")\n", + "ax.xaxis.tick_top()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/histplot.ipynb b/doc/_docstrings/histplot.ipynb new file mode 100644 index 0000000000..b448f7a65a --- /dev/null +++ b/doc/_docstrings/histplot.ipynb @@ -0,0 +1,483 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"white\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assign a variable to ``x`` to plot a univariate distribution along the x axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.histplot(data=penguins, x=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Flip the plot by assigning the data variable to the y axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, y=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check how well the histogram represents the data by specifying a different bin width:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, x=\"flipper_length_mm\", binwidth=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also define the total number of bins to use:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, x=\"flipper_length_mm\", bins=30)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a kernel density estimate to smooth the histogram, providing complementary information about the shape of the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, x=\"flipper_length_mm\", kde=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If neither `x` nor `y` is assigned, the dataset is treated as wide-form, and a histogram is drawn for each numeric column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can otherwise draw multiple histograms from a long-form dataset with hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, x=\"flipper_length_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default approach to plotting multiple distributions is to \"layer\" them, but you can also \"stack\" them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Overlapping bars can be hard to visually resolve. A different approach would be to draw a step function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(penguins, x=\"flipper_length_mm\", hue=\"species\", element=\"step\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can move even farther away from bars by drawing a polygon with vertices in the center of each bin. This may make it easier to see the shape of the distribution, but use with caution: it will be less obvious to your audience that they are looking at a histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(penguins, x=\"flipper_length_mm\", hue=\"species\", element=\"poly\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To compare the distribution of subsets that differ substantially in size, use independent density normalization:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " penguins, x=\"bill_length_mm\", hue=\"island\", element=\"step\",\n", + " stat=\"density\", common_norm=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's also possible to normalize so that each bar's height shows a probability, proportion, or percent, which make more sense for discrete variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.histplot(data=tips, x=\"size\", stat=\"percent\", discrete=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can even draw a histogram over categorical variables (although this is an experimental feature):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=tips, x=\"day\", shrink=.8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When using a ``hue`` semantic with discrete data, it can make sense to \"dodge\" the levels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=tips, x=\"day\", hue=\"sex\", multiple=\"dodge\", shrink=.8)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Real-world data is often skewed. For heavily skewed distributions, it's better to define the bins in log space. Compare:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "planets = sns.load_dataset(\"planets\")\n", + "sns.histplot(data=planets, x=\"distance\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To the log-scale version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=planets, x=\"distance\", log_scale=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are also a number of options for how the histogram appears. You can show unfilled bars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=planets, x=\"distance\", log_scale=True, fill=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or an unfilled step function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(data=planets, x=\"distance\", log_scale=True, element=\"step\", fill=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step functions, especially when unfilled, make it easy to compare cumulative histograms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " data=planets, x=\"distance\", hue=\"method\",\n", + " hue_order=[\"Radial Velocity\", \"Transit\"],\n", + " log_scale=True, element=\"step\", fill=False,\n", + " cumulative=True, stat=\"density\", common_norm=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When both ``x`` and ``y`` are assigned, a bivariate histogram is computed and shown as a heatmap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(penguins, x=\"bill_depth_mm\", y=\"body_mass_g\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's possible to assign a ``hue`` variable too, although this will not work well if data from the different levels have substantial overlap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(penguins, x=\"bill_depth_mm\", y=\"body_mass_g\", hue=\"species\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multiple color maps can make sense when one of the variables is discrete:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " penguins, x=\"bill_depth_mm\", y=\"species\", hue=\"species\", legend=False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The bivariate histogram accepts all of the same options for computation as its univariate counterpart, using tuples to parametrize ``x`` and ``y`` independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " planets, x=\"year\", y=\"distance\",\n", + " bins=30, discrete=(True, False), log_scale=(False, True),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default behavior makes cells with no observations transparent, although this can be disabled: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " planets, x=\"year\", y=\"distance\",\n", + " bins=30, discrete=(True, False), log_scale=(False, True),\n", + " thresh=None,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's also possible to set the threshold and colormap saturation point in terms of the proportion of cumulative counts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " planets, x=\"year\", y=\"distance\",\n", + " bins=30, discrete=(True, False), log_scale=(False, True),\n", + " pthresh=.05, pmax=.9,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To annotate the colormap, add a colorbar:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.histplot(\n", + " planets, x=\"year\", y=\"distance\",\n", + " bins=30, discrete=(True, False), log_scale=(False, True),\n", + " cbar=True, cbar_kws=dict(shrink=.75),\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/hls_palette.ipynb b/doc/_docstrings/hls_palette.ipynb new file mode 100644 index 0000000000..49a7db979f --- /dev/null +++ b/doc/_docstrings/hls_palette.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "158cd1cf-6b30-4054-b32f-a166fcb883be", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "c81b86cb-fb4e-418b-8d2f-6cd10601ac5a", + "metadata": {}, + "source": [ + "By default, return 6 colors with identical lightness and saturation and evenly-sampled hues:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c3eaeaf-88eb-4012-96ea-41b328fa98b9", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette()" + ] + }, + { + "cell_type": "raw", + "id": "f7624b0b-2311-45de-b6a5-fc07132ce455", + "metadata": {}, + "source": [ + "Increase the number of colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "555c29d1-6972-4a19-ad32-957fb7545634", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette(8)" + ] + }, + { + "cell_type": "raw", + "id": "24713fa6-e485-4358-9ffc-d40bd9543caa", + "metadata": {}, + "source": [ + "Decrease the lightness:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6f80b4c-f7b4-4deb-a119-cdf6cfe1f7b5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette(l=.3)" + ] + }, + { + "cell_type": "raw", + "id": "e521b514-5572-43e8-95ae-a20cc30169b8", + "metadata": {}, + "source": [ + "Decrease the saturation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f88bd038-0c9c-48b1-92b0-d272a9c199f4", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette(s=.3)" + ] + }, + { + "cell_type": "raw", + "id": "92a2212c-2177-4c82-8a5e-9dd788e9f87c", + "metadata": {}, + "source": [ + "Change the start-point for hue sampling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8da8fbc-551c-4896-b1b8-04203e740d78", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette(h=.5)" + ] + }, + { + "cell_type": "raw", + "id": "87780608-1f5a-409f-b31f-6a31a599f122", + "metadata": {}, + "source": [ + "Return a continuous colormap. Notice the perceptual discontinuities, especially around yellow, cyan, and magenta: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c622b3b-70d7-4139-8389-f3d0d4addd66", + "metadata": {}, + "outputs": [], + "source": [ + "sns.hls_palette(as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a83c1de-88c5-4327-abd2-19e8f3642052", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/husl_palette.ipynb b/doc/_docstrings/husl_palette.ipynb new file mode 100644 index 0000000000..8b48b55898 --- /dev/null +++ b/doc/_docstrings/husl_palette.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a6794650-f28f-40eb-95a7-3f0e5c4b332d", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "fab2f86e-45d4-4982-ade7-0a5ea6d762d1", + "metadata": {}, + "source": [ + "By default, return 6 colors with identical lightness and saturation and evenly-sampled hues:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b220950e-0ca2-4101-b56a-14eebe8ee8d0", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette()" + ] + }, + { + "cell_type": "raw", + "id": "c5e4a2e3-e6b8-42bf-be19-348ff7ae2798", + "metadata": {}, + "source": [ + "Increase the number of colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d0af740-cfca-49fb-a472-1daa4ccb3f3a", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette(8)" + ] + }, + { + "cell_type": "raw", + "id": "1a7189f2-2a26-446a-90e7-cf41dcac4f25", + "metadata": {}, + "source": [ + "Decrease the lightness:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43af79c7-f497-41e5-874a-83eed99500f3", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette(l=.4)" + ] + }, + { + "cell_type": "raw", + "id": "6d4099b7-5115-4365-b120-33a345581f5d", + "metadata": {}, + "source": [ + "Decrease the saturation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c1afc7-d982-4199-b218-222aa94563c5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette(s=.4)" + ] + }, + { + "cell_type": "raw", + "id": "d26131ac-0d11-48c5-88b1-4e5cf9383000", + "metadata": {}, + "source": [ + "Change the start-point for hue sampling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d72f06a0-13e0-47f7-bc70-4c5935eaa130", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette(h=.5)" + ] + }, + { + "cell_type": "raw", + "id": "7e6c3c19-41d3-4315-b03e-909d201d0e76", + "metadata": {}, + "source": [ + "Return a continuous colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49c18838-0589-496f-9a61-635195c07f61", + "metadata": {}, + "outputs": [], + "source": [ + "sns.husl_palette(as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c710a557-8e84-44cb-ab4c-baabcc4fd328", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/jointplot.ipynb b/doc/_docstrings/jointplot.ipynb new file mode 100644 index 0000000000..b0b9d8f3ed --- /dev/null +++ b/doc/_docstrings/jointplot.ipynb @@ -0,0 +1,194 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"white\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In the simplest invocation, assign ``x`` and ``y`` to create a scatterplot (using :func:`scatterplot`) with marginal histograms (using :func:`histplot`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a ``hue`` variable will add conditional colors to the scatterplot and draw separate density curves (using :func:`kdeplot`) on the marginal axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Several different approaches to plotting are available through the ``kind`` parameter. Setting ``kind=\"kde\"`` will draw both bivariate and univariate KDEs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Set ``kind=\"reg\"`` to add a linear regression fit (using :func:`regplot`) and univariate KDE curves:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"reg\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "There are also two options for bin-based visualization of the joint distribution. The first, with ``kind=\"hist\"``, uses :func:`histplot` on all of the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"hist\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Alternatively, setting ``kind=\"hex\"`` will use :meth:`matplotlib.axes.Axes.hexbin` to compute a bivariate histogram using hexagonal bins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"hex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Additional keyword arguments can be passed down to the underlying plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " marker=\"+\", s=100, marginal_kws=dict(bins=25, fill=False),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use :class:`JointGrid` parameters to control the size and layout of the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", height=5, ratio=2, marginal_ticks=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To add more layers onto the plot, use the methods on the :class:`JointGrid` object that :func:`jointplot` returns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot_joint(sns.kdeplot, color=\"r\", zorder=0, levels=6)\n", + "g.plot_marginals(sns.rugplot, color=\"r\", height=-.15, clip_on=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/kdeplot.ipynb b/doc/_docstrings/kdeplot.ipynb new file mode 100644 index 0000000000..f301c56359 --- /dev/null +++ b/doc/_docstrings/kdeplot.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set_theme()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot a univariate distribution along the x axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.kdeplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Flip the plot by assigning the data variable to the y axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, y=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot distributions for each column of a wide-form dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris = sns.load_dataset(\"iris\")\n", + "sns.kdeplot(data=iris)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use less smoothing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=.2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use more smoothing, but don't smooth past the extreme data points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax= sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=5, cut=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot conditional distributions with hue mapping of a second variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Stack\" the conditional distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"stack\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalize the stacked distribution at each value in the grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"fill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Estimate the cumulative distribution function(s), normalizing each subset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=tips, x=\"total_bill\", hue=\"time\",\n", + " cumulative=True, common_norm=False, common_grid=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Estimate distribution from aggregated data, using weights:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips_agg = (tips\n", + " .groupby(\"size\")\n", + " .agg(total_bill=(\"total_bill\", \"mean\"), n=(\"total_bill\", \"count\"))\n", + ")\n", + "sns.kdeplot(data=tips_agg, x=\"total_bill\", weights=\"n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Map the data variable with log scaling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.kdeplot(data=diamonds, x=\"price\", log_scale=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use numeric hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"size\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modify the appearance of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=tips, x=\"total_bill\", hue=\"size\",\n", + " fill=True, common_norm=False, palette=\"crest\",\n", + " alpha=.5, linewidth=0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot a bivariate distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geyser = sns.load_dataset(\"geyser\")\n", + "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Map a third variable with a hue semantic to show conditional distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show filled contours:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\", fill=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show fewer contour levels, covering less of the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\",\n", + " levels=5, thresh=.2,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fill the axes extent with a smooth distribution, using a different colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\",\n", + " fill=True, thresh=0, levels=100, cmap=\"mako\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/light_palette.ipynb b/doc/_docstrings/light_palette.ipynb new file mode 100644 index 0000000000..15564b63e3 --- /dev/null +++ b/doc/_docstrings/light_palette.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5cd1cbb8-ba1a-460b-8e3a-bc285867f1d1", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "raw", + "id": "b157eb25-015f-4dd6-9785-83ba19cf4f94", + "metadata": {}, + "source": [ + "Define a sequential ramp from a light gray to a specified color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "851a4742-6276-4383-b17e-480beb896877", + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette(\"seagreen\")" + ] + }, + { + "cell_type": "raw", + "id": "50053b26-112a-4378-8ef0-9be0fb565ec7", + "metadata": {}, + "source": [ + "Specify the color with a hex code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74ae0d17-f65b-4bcf-ae66-d97d46964d5c", + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette(\"#79C\")" + ] + }, + { + "cell_type": "raw", + "id": "eea376a2-fdf5-40e4-a187-3a28af529072", + "metadata": {}, + "source": [ + "Specify the color from the husl system:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66e451ee-869a-41ea-8dc5-4240b11e7be5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette((20, 60, 50), input=\"husl\")" + ] + }, + { + "cell_type": "raw", + "id": "e4f44dcd-cf49-4920-ac05-b4db67870363", + "metadata": {}, + "source": [ + "Increase the number of colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75985f07-de92-4d8b-89d5-caf445b9375e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette(\"xkcd:copper\", 8)" + ] + }, + { + "cell_type": "raw", + "id": "34687ae8-fd6d-427a-a639-208f19e61122", + "metadata": {}, + "source": [ + "Return a continuous colormap rather than a discrete palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c342db4-7f97-40f5-934e-9a82201890d1", + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette(\"#a275ac\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7ebe64b-25fa-4c52-9ebe-fdcbba0ee51e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/lineplot.ipynb b/doc/_docstrings/lineplot.ipynb new file mode 100644 index 0000000000..4bd9e8a57c --- /dev/null +++ b/doc/_docstrings/lineplot.ipynb @@ -0,0 +1,453 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``flights`` dataset has 10 years of monthly airline passenger data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights = sns.load_dataset(\"flights\")\n", + "flights.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To draw a line plot using long-form data, assign the ``x`` and ``y`` variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "may_flights = flights.query(\"month == 'May'\")\n", + "sns.lineplot(data=may_flights, x=\"year\", y=\"passengers\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Pivot the dataframe to a wide-form representation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide = flights.pivot(index=\"year\", columns=\"month\", values=\"passengers\")\n", + "flights_wide.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To plot a single vector, pass it to ``data``. If the vector is a :class:`pandas.Series`, it will be plotted against its index:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights_wide[\"May\"])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Passing the entire wide-form dataset to ``data`` plots a separate line for each column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights_wide)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Passing the entire dataset in long-form mode will aggregate over repeated values (each year) to show the mean and 95% confidence interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights, x=\"year\", y=\"passengers\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assign a grouping semantic (``hue``, ``size``, or ``style``) to plot separate lines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights, x=\"year\", y=\"passengers\", hue=\"month\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The same column can be assigned to multiple semantic variables, which can increase the accessibility of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights, x=\"year\", y=\"passengers\", hue=\"month\", style=\"month\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use the `orient` parameter to aggregate and sort along the vertical dimension of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=flights, x=\"passengers\", y=\"year\", orient=\"y\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Each semantic variable can also represent a different column. For that, we'll need a more complex dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fmri = sns.load_dataset(\"fmri\")\n", + "fmri.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Repeated observations are aggregated even when semantic grouping is used:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=fmri, x=\"timepoint\", y=\"signal\", hue=\"event\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assign both ``hue`` and ``style`` to represent two different grouping variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(data=fmri, x=\"timepoint\", y=\"signal\", hue=\"region\", style=\"event\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When assigning a ``style`` variable, markers can be used instead of (or along with) dashes to distinguish the groups:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=fmri,\n", + " x=\"timepoint\", y=\"signal\", hue=\"event\", style=\"event\",\n", + " markers=True, dashes=False\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Show error bars instead of error bands and extend them to two standard error widths:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=fmri, x=\"timepoint\", y=\"signal\", hue=\"event\", err_style=\"bars\", errorbar=(\"se\", 2),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning the ``units`` variable will plot multiple lines without applying a semantic mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=fmri.query(\"region == 'frontal'\"),\n", + " x=\"timepoint\", y=\"signal\", hue=\"event\", units=\"subject\",\n", + " estimator=None, lw=1,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Load another dataset with a numeric grouping variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dots = sns.load_dataset(\"dots\").query(\"align == 'dots'\")\n", + "dots.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a numeric variable to ``hue`` maps it differently, using a different default palette and a quantitative color mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=dots, x=\"time\", y=\"firing_rate\", hue=\"coherence\", style=\"choice\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Control the color mapping by setting the ``palette`` and passing a :class:`matplotlib.colors.Normalize` object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=dots.query(\"coherence > 0\"),\n", + " x=\"time\", y=\"firing_rate\", hue=\"coherence\", style=\"choice\",\n", + " palette=\"flare\", hue_norm=mpl.colors.LogNorm(),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or pass specific colors, either as a Python list or dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "palette = sns.color_palette(\"mako_r\", 6)\n", + "sns.lineplot(\n", + " data=dots, x=\"time\", y=\"firing_rate\",\n", + " hue=\"coherence\", style=\"choice\",\n", + " palette=palette\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assign the ``size`` semantic to map the width of the lines with a numeric variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=dots, x=\"time\", y=\"firing_rate\",\n", + " size=\"coherence\", hue=\"choice\",\n", + " legend=\"full\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Pass a a tuple, ``sizes=(smallest, largest)``, to control the range of linewidths used to map the ``size`` semantic:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lineplot(\n", + " data=dots, x=\"time\", y=\"firing_rate\",\n", + " size=\"coherence\", hue=\"choice\",\n", + " sizes=(.25, 2.5)\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the observations are sorted by ``x``. Disable this to plot a line with the order that observations appear in the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x, y = np.random.normal(size=(2, 5000)).cumsum(axis=1)\n", + "sns.lineplot(x=x, y=y, sort=False, lw=1)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use :func:`relplot` to combine :func:`lineplot` and :class:`FacetGrid`. This allows grouping within additional categorical variables. Using :func:`relplot` is safer than using :class:`FacetGrid` directly, as it ensures synchronization of the semantic mappings across facets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, x=\"timepoint\", y=\"signal\",\n", + " col=\"region\", hue=\"event\", style=\"event\",\n", + " kind=\"line\"\n", + ")" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "8bdfc9d9da1e36addfcfc8a3409187c45d33387af0f87d0d91e99e8d6403f1c3" + }, + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/lmplot.ipynb b/doc/_docstrings/lmplot.ipynb new file mode 100644 index 0000000000..4a5b4119b8 --- /dev/null +++ b/doc/_docstrings/lmplot.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "034a9a5b-91ff-4ccc-932d-0f314e2cd6d2", + "metadata": {}, + "source": [ + "See the :func:`regplot` docs for demonstrations of various options for specifying the regression model, which are also accepted here." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76c91243-3bd8-49a1-b8c8-b7272f09a3f1", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"ticks\")\n", + "penguins = sns.load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "0ba9f55d-17ea-4084-a74f-852d51771380", + "metadata": {}, + "source": [ + "Plot a regression fit over a scatter plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f789265-93c0-4867-b666-798713e4e7e5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "id": "7e4b0ad4-446c-4109-9393-961f76132e34", + "metadata": {}, + "source": [ + "Condition the regression fit on another variable and represent it using color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61347189-34e5-42ea-b77b-4acdef843326", + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "id": "c9b6d059-49dc-46a7-869b-86baa3a7ed65", + "metadata": {}, + "source": [ + "Condition the regression fit on another variable and split across subplots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ec2955-ccc9-493c-b9ec-c78648ce9f53", + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " hue=\"species\", col=\"sex\", height=4,\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "de01dee1-b2ce-445c-8d0d-d054ca0dfedb", + "metadata": {}, + "source": [ + "Condition across two variables using both columns and rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f1264aa-829c-416a-805a-b989e5f11a17", + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " col=\"species\", row=\"sex\", height=3,\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "b3888f04-b22f-4205-8acc-24ce5b59568e", + "metadata": {}, + "source": [ + "Allow axis limits to vary across subplots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67ed5af1-d228-4b81-b4f8-21937c513a10", + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " col=\"species\", row=\"sex\", height=3,\n", + " facet_kws=dict(sharex=False, sharey=False),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46e9cf18-c847-4c40-8e38-6c20cdde2be5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/move_legend.ipynb b/doc/_docstrings/move_legend.ipynb new file mode 100644 index 0000000000..f16fcf502b --- /dev/null +++ b/doc/_docstrings/move_legend.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8ec46ad8-bc4c-4ee0-9626-271088c702f9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "penguins = sns.load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "008bdd98-88cb-4a81-9f50-9b0e5a357305", + "metadata": {}, + "source": [ + "For axes-level functions, pass the :class:`matplotlib.axes.Axes` object and provide a new location." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b82e58f9-b15d-4554-bee5-de6a689344a6", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.histplot(penguins, x=\"bill_length_mm\", hue=\"species\")\n", + "sns.move_legend(ax, \"center right\")" + ] + }, + { + "cell_type": "raw", + "id": "4f2a7f5d-ab39-46c7-87f4-532e607adf0b", + "metadata": {}, + "source": [ + "Use the `bbox_to_anchor` parameter for more fine-grained control, including moving the legend outside of the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed610a98-447a-4459-8342-48abc80330f0", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.histplot(penguins, x=\"bill_length_mm\", hue=\"species\")\n", + "sns.move_legend(ax, \"upper left\", bbox_to_anchor=(1, 1))" + ] + }, + { + "cell_type": "raw", + "id": "9d2fd766-a806-45d9-949d-1572991cf512", + "metadata": {}, + "source": [ + "Pass additional :meth:`matplotlib.axes.Axes.legend` parameters to update other properties:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ad4342c-c46e-49e9-98a2-6c88c6fb4c54", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.histplot(penguins, x=\"bill_length_mm\", hue=\"species\")\n", + "sns.move_legend(\n", + " ax, \"lower center\",\n", + " bbox_to_anchor=(.5, 1), ncol=3, title=None, frameon=False,\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0d573092-46fd-4a95-b7ed-7e6833823adc", + "metadata": {}, + "source": [ + "It's also possible to move the legend created by a figure-level function. But when fine-tuning the position, you must bear in mind that the figure will have extra blank space on the right:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b258a9b8-69e5-4d4a-94cb-5b6baddc402b", + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.displot(\n", + " penguins,\n", + " x=\"bill_length_mm\", hue=\"species\",\n", + " col=\"island\", col_wrap=2, height=3,\n", + ")\n", + "sns.move_legend(g, \"upper left\", bbox_to_anchor=(.55, .45))" + ] + }, + { + "cell_type": "raw", + "id": "c9dc54e2-2c66-412f-ab2a-4f2bc2cb5782", + "metadata": {}, + "source": [ + "One way to avoid this would be to set `legend_out=False` on the :class:`FacetGrid`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06cff408-4cdf-47af-8def-176f3e70ec5a", + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.displot(\n", + " penguins,\n", + " x=\"bill_length_mm\", hue=\"species\",\n", + " col=\"island\", col_wrap=2, height=3,\n", + " facet_kws=dict(legend_out=False),\n", + ")\n", + "sns.move_legend(g, \"upper left\", bbox_to_anchor=(.55, .45), frameon=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b170f20d-22a9-4f7d-917a-d09e10b1f08c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/mpl_palette.ipynb b/doc/_docstrings/mpl_palette.ipynb new file mode 100644 index 0000000000..c65d4292f8 --- /dev/null +++ b/doc/_docstrings/mpl_palette.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1d0d41d3-463c-4c6f-aa65-38131bdf3ddb", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "sns.palettes._patch_colormap_display()" + ] + }, + { + "cell_type": "markdown", + "id": "d2a0ae1e-a01e-49b3-a677-2b05a195990a", + "metadata": {}, + "source": [ + "Return discrete samples from a continuous matplotlib colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b6a4ce9-6e4e-4b59-ada8-14ef8aef21d7", + "metadata": {}, + "outputs": [], + "source": [ + "sns.mpl_palette(\"viridis\")" + ] + }, + { + "cell_type": "raw", + "id": "0ccc47b1-c969-46e2-93bb-b9eb5a2e2141", + "metadata": {}, + "source": [ + "Return the continuous colormap instead; note how the extreme values are more intense:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8a1bc5d-1d62-45c6-a53b-9fadb58f11c0", + "metadata": {}, + "outputs": [], + "source": [ + "sns.mpl_palette(\"viridis\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "id": "ff0d1a3b-8641-40c0-bb4b-c22b83ec9432", + "metadata": {}, + "source": [ + "Return more colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8faef1d8-a1eb-4060-be10-377342c9bd1d", + "metadata": {}, + "outputs": [], + "source": [ + "sns.mpl_palette(\"viridis\", 8)" + ] + }, + { + "cell_type": "raw", + "id": "612bf052-e888-411d-a2ea-6a742a78bc63", + "metadata": {}, + "source": [ + "Return values from a qualitative colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74db95a8-4898-4f6c-a57d-c751af1dc7bf", + "metadata": {}, + "outputs": [], + "source": [ + "sns.mpl_palette(\"Set2\")" + ] + }, + { + "cell_type": "raw", + "id": "918494bf-1b8e-4b00-8950-1bd73032dee1", + "metadata": {}, + "source": [ + "Notice how the palette will only contain distinct colors and can be shorter than requested:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d97efa25-9050-4e28-b758-da6f43c9f963", + "metadata": {}, + "outputs": [], + "source": [ + "sns.mpl_palette(\"Set2\", 10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f64ad118-e213-43cc-a714-98ed13cc3824", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Agg.ipynb b/doc/_docstrings/objects.Agg.ipynb new file mode 100644 index 0000000000..5e640f324a --- /dev/null +++ b/doc/_docstrings/objects.Agg.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0d053943-66c9-410d-ad65-ce91f1c1ff48", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "51b029af-b83b-4ae0-a6ff-f48bf9692518", + "metadata": {}, + "source": [ + "The default behavior is to aggregate by taking a mean over each group:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28451b4e-9f4e-4604-b2b9-6138c4f51436", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(diamonds, \"clarity\", \"carat\")\n", + "p.add(so.Bar(), so.Agg())" + ] + }, + { + "cell_type": "raw", + "id": "53859a3b-051c-423d-97ef-b03f647268b7", + "metadata": {}, + "source": [ + "Other aggregation functions can be selected by name if they are pandas methods:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5beaac3a-b9f7-4acc-81c7-480599e3675e", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bar(), so.Agg(\"median\"))" + ] + }, + { + "cell_type": "raw", + "id": "2d318ee3-56c1-4fd4-99a5-fa87db770f67", + "metadata": {}, + "source": [ + "It's also possible to pass an arbitrary aggregation function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd11e289-7274-464a-b781-06fb756cf8de", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bar(), so.Agg(lambda x: x.quantile(.75) - x.quantile(.25)))" + ] + }, + { + "cell_type": "raw", + "id": "555394c1-25f8-4932-94d1-f67a8a9fa1c6", + "metadata": {}, + "source": [ + "When other mapping variables are assigned, they'll be used to define aggregation groups. With some marks, it may be helpful to use additional transforms, such as :class:`Dodge`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5755cdeb-1d1a-4434-9cc5-91024735eb4e", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bar(), so.Agg(), so.Dodge(), color=\"cut\")" + ] + }, + { + "cell_type": "raw", + "id": "07eb1150-db57-4a58-b830-8a7aba9f46ec", + "metadata": {}, + "source": [ + "The variable that gets aggregated depends on the orientation of the layer, which is usually inferred from the coordinate variable types (but may also be specified with the `orient` parameter in :meth:`Plot.add`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bdcc970-1b6c-4a3d-b0bc-6c7a625163ff", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(diamonds, \"carat\", \"clarity\").add(so.Bar(), so.Agg())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad8006ff-5472-4345-9537-a5680c519f4f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Area.ipynb b/doc/_docstrings/objects.Area.ipynb new file mode 100644 index 0000000000..86867b50be --- /dev/null +++ b/doc/_docstrings/objects.Area.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "healthexp = (\n", + " load_dataset(\"healthexp\")\n", + " .pivot(index=\"Year\", columns=\"Country\", values=\"Spending_USD\")\n", + " .interpolate()\n", + " .stack()\n", + " .rename(\"Spending_USD\")\n", + " .reset_index()\n", + " .sort_values(\"Country\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d3bc7fe-0b0b-49eb-8f8b-ddd8c7441044", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(healthexp, \"Year\", \"Spending_USD\").facet(\"Country\", wrap=3)\n", + "p.add(so.Area())" + ] + }, + { + "cell_type": "raw", + "id": "3a47b7f1-31ef-4218-a1ea-c289f3c64ab5", + "metadata": {}, + "source": [ + "The `color` property sets both the edge and fill color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1697359a-bf26-49d0-891b-49c207cab82d", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), color=\"Country\")" + ] + }, + { + "cell_type": "raw", + "id": "9bfaed37-7153-45d9-89e5-b348c7c14401", + "metadata": {}, + "source": [ + "It's also possible to map only the `edgecolor`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39e5c9e5-793e-450c-a5d2-e09d5ad1f854", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(color=\".5\", edgewidth=2), edgecolor=\"Country\")" + ] + }, + { + "cell_type": "raw", + "id": "0b1a5297-9e96-472d-b284-919048e41358", + "metadata": {}, + "source": [ + "The mark is drawn as a polygon, but it can be combined with :class:`Line` to draw a shaded region by setting `edgewidth=0`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42b65535-acf6-4634-84bd-6e35305e3018", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(edgewidth=0)).add(so.Line())" + ] + }, + { + "cell_type": "raw", + "id": "59761f97-eadb-4047-9e6b-09339545fe57", + "metadata": {}, + "source": [ + "The layer's orientation defines the axis that the mark fills from:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1c30f88-6287-486d-ae4b-fc272bc8e6ab", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), x=\"Spending_USD\", y=\"Year\", orient=\"y\")" + ] + }, + { + "cell_type": "raw", + "id": "f1b893c5-6847-4e5b-9fc2-4190ddd75099", + "metadata": {}, + "source": [ + "This mark can be stacked to show part-whole relationships:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a79e6e-3e7f-4f54-9394-f8b003a0e228", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, \"Year\", \"Spending_USD\", color=\"Country\")\n", + " .add(so.Area(alpha=.7), so.Stack())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f4e423-94f4-4003-b337-12162d1040c2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Band.ipynb b/doc/_docstrings/objects.Band.ipynb new file mode 100644 index 0000000000..896f96a199 --- /dev/null +++ b/doc/_docstrings/objects.Band.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "fmri = load_dataset(\"fmri\").query(\"region == 'parietal'\")\n", + "seaice = (\n", + " load_dataset(\"seaice\")\n", + " .assign(\n", + " Day=lambda x: x[\"Date\"].dt.day_of_year,\n", + " Year=lambda x: x[\"Date\"].dt.year,\n", + " )\n", + " .query(\"Year >= 1980\")\n", + " .astype({\"Year\": str})\n", + " .pivot(index=\"Day\", columns=\"Year\", values=\"Extent\")\n", + " .filter([\"1980\", \"2019\"])\n", + " .dropna()\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e840e876-fbd6-4bfd-868c-a9d7af7913fa", + "metadata": {}, + "source": [ + "The mark fills between pairs of data points to show an interval on the value axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "518cf20d-bb0b-433a-9b25-f1ed8d432149", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(seaice, x=\"Day\", ymin=\"1980\", ymax=\"2019\")\n", + "p.add(so.Band())" + ] + }, + { + "cell_type": "raw", + "id": "fa50b778-13f9-4368-a967-68365fd51117", + "metadata": {}, + "source": [ + "By default it draws a faint ribbon with no edges, but edges can be added:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a05176c4-0615-49ca-a2df-48ced8b5a8a8", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Band(alpha=.5, edgewidth=2))" + ] + }, + { + "cell_type": "raw", + "id": "776d192a-f35f-4253-be7f-01e4b2466dad", + "metadata": {}, + "source": [ + "The defaults are optimized for the main expected usecase, where the mark is combined with a line to show an errorbar interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f4e423-94f4-4003-b337-12162d1040c2", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(fmri, x=\"timepoint\", y=\"signal\", color=\"event\")\n", + " .add(so.Band(), so.Est())\n", + " .add(so.Line(), so.Agg())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "9f0c82bf-3457-4ac5-ba48-8930bac03d75", + "metadata": {}, + "source": [ + "When min/max values are not explicitly assigned or added in a transform, the band will cover the full extent of the data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "309f578e-da3d-4dc5-b6ac-a354321334c8", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(fmri, x=\"timepoint\", y=\"signal\", color=\"event\")\n", + " .add(so.Line(linewidth=.5), group=\"subject\")\n", + " .add(so.Band())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4330a3cd-63fe-470a-8e83-09e9606643b5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Bar.ipynb b/doc/_docstrings/objects.Bar.ipynb new file mode 100644 index 0000000000..8d746252aa --- /dev/null +++ b/doc/_docstrings/objects.Bar.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")\n", + "flights = load_dataset(\"flights\").query(\"year == 1960\")" + ] + }, + { + "cell_type": "raw", + "id": "4e817cdd-09a3-4cf6-8602-e9665607bfe1", + "metadata": {}, + "source": [ + "The mark draws discrete bars from a baseline to provided values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a4e5ba1-50ce-4060-8eb7-f17fee9080c0", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(flights[\"month\"], flights[\"passengers\"]).add(so.Bar())" + ] + }, + { + "cell_type": "raw", + "id": "252cf7b2-7fc8-4085-8174-0126743d8a08", + "metadata": {}, + "source": [ + "The bars are oriented depending on the x/y variable types and the `orient` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81dbbc81-178a-46dd-9acf-2c57d2a7e315", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(flights[\"passengers\"], flights[\"month\"]).add(so.Bar())" + ] + }, + { + "cell_type": "markdown", + "id": "6fddeceb-25b9-4fc1-bae0-4cc4cb612674", + "metadata": {}, + "source": [ + "A common usecase will be drawing histograms on a variable with a nominal scale:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08604543-c681-4cd3-943e-b57c0f863b2e", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(penguins, x=\"species\").add(so.Bar(), so.Hist())" + ] + }, + { + "cell_type": "markdown", + "id": "8b9af978-fdb0-46aa-9cf9-d3e49e38b344", + "metadata": {}, + "source": [ + "When mapping additional variables, the bars will overlap by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "297f7fef-7c31-40dd-ac68-e0ce7f131528", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(penguins, x=\"species\", color=\"sex\").add(so.Bar(), so.Hist())" + ] + }, + { + "cell_type": "raw", + "id": "cd9b7b4a-3150-42b5-b1a8-1c5950ca8703", + "metadata": {}, + "source": [ + "Apply a move transform, such as a :class:`Dodge` or :class:`Stack` to resolve them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a13c7594-737c-4215-b2a2-e59fc2d033c3", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(penguins, x=\"species\", color=\"sex\").add(so.Bar(), so.Hist(), so.Dodge())" + ] + }, + { + "cell_type": "raw", + "id": "f5f44a6b-610a-4523-a7c2-39c804a60520", + "metadata": {}, + "source": [ + "A number of properties can be mapped or set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5cbf5a9-effb-4550-bdaf-c266dc69d3f0", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " penguins, x=\"species\",\n", + " color=\"sex\", alpha=\"sex\", edgestyle=\"sex\",\n", + " )\n", + " .add(so.Bar(edgewidth=2), so.Hist(), so.Dodge(\"fill\"))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "539144d9-75bc-4eb0-8fed-ca57b516b6d3", + "metadata": {}, + "source": [ + "Combine with :class:`Range` to plot an estimate with errorbars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89233c4a-38e7-4807-b3b4-3b4540ffcf56", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, \"body_mass_g\", \"species\", color=\"sex\")\n", + " .add(so.Bar(alpha=.5), so.Agg(), so.Dodge())\n", + " .add(so.Range(), so.Est(errorbar=\"sd\"), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f6a97a0-2d92-4fd5-ad98-b4299bda1b6b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Bars.ipynb b/doc/_docstrings/objects.Bars.ipynb new file mode 100644 index 0000000000..b6609731e2 --- /dev/null +++ b/doc/_docstrings/objects.Bars.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "5cf83822-ceb1-4ce5-8364-069466f7aa40", + "metadata": {}, + "source": [ + "This mark draws bars between a baseline and a value. In contrast to :class:`Bar`, the bars have a full width and thin edges by default; this makes this mark a better choice for a continuous histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b99eaf-695f-41ae-9bd1-bfe406dedb63", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(diamonds, \"price\").scale(x=\"log\")\n", + "p.add(so.Bars(), so.Hist())" + ] + }, + { + "cell_type": "raw", + "id": "bc4c0f25-3f7a-4a2c-a032-151da47f5ea3", + "metadata": {}, + "source": [ + "When mapping the color or other properties, bars will overlap by default; this is usually confusing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7989211b-7a29-4763-bb97-4ea19cdef081", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(), color=\"cut\")" + ] + }, + { + "cell_type": "raw", + "id": "f16a3b5d-1ac1-4d9d-9bc6-d4cea7f83a17", + "metadata": {}, + "source": [ + "Using a move transform, such as :class:`Stack` or :class:`Dodge`, will resolve the overlap (although faceting might often be a better approach):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8933f5f7-1423-4741-b7be-6239ea8b2fee", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(), so.Stack(), color=\"cut\")" + ] + }, + { + "cell_type": "raw", + "id": "74075e80-0361-4388-a459-cbfa6418df6c", + "metadata": {}, + "source": [ + "A number of different properties can be set or mapped:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04fada68-a61b-451c-b3bd-9aaab16b5f29", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(edgewidth=0), so.Hist(), so.Stack(), alpha=\"clarity\")" + ] + }, + { + "cell_type": "raw", + "id": "a14d7d36-9d8b-4024-8653-002e9da946d7", + "metadata": {}, + "source": [ + "It is possible to draw unfilled bars, but you must override the default edge color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21642f8c-99c7-4f61-b3f5-bc1dacc638c3", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(fill=False, edgecolor=\"C0\", edgewidth=1.5), so.Hist())" + ] + }, + { + "cell_type": "raw", + "id": "dce5b6cc-0808-48ec-b4d6-0c0c2e5178d2", + "metadata": {}, + "source": [ + "It is also possible to narrow the bars, which may be useful for dealing with overlap in some cases:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "166693bf-420c-4ec3-8da2-abc22724952b", + "metadata": {}, + "outputs": [], + "source": [ + "hist = so.Hist(binwidth=.075, binrange=(2, 5))\n", + "(\n", + " p.add(so.Bars(), hist)\n", + " .add(\n", + " so.Bars(color=\".9\", width=.5), hist,\n", + " data=diamonds.query(\"cut == 'Ideal'\")\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b40b02c4-fb2c-4300-93e4-24ea28bc6ef8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Count.ipynb b/doc/_docstrings/objects.Count.ipynb new file mode 100644 index 0000000000..ee7af016e9 --- /dev/null +++ b/doc/_docstrings/objects.Count.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "89113d6b-70b9-4ebe-9910-10a80eab246e", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "tips = load_dataset(\"tips\")" + ] + }, + { + "cell_type": "raw", + "id": "daf6ff78-df24-4541-ba72-73fb9eddb50d", + "metadata": {}, + "source": [ + "The transform counts distinct observations of the orientation variable defines a new variable on the opposite axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "390f2fd3-0596-40e3-b262-163b3a90d055", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(tips, x=\"day\").add(so.Bar(), so.Count())" + ] + }, + { + "cell_type": "raw", + "id": "813fb4a5-db68-4b51-b236-5b5628ebba47", + "metadata": {}, + "source": [ + "When additional mapping variables are defined, they are also used to define groups:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76a4ae70-e914-4f54-b979-ce1b79374fc3", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(tips, x=\"day\", color=\"sex\").add(so.Bar(), so.Count(), so.Dodge())" + ] + }, + { + "cell_type": "raw", + "id": "2973dee1-5aee-4768-846d-22d220faf170", + "metadata": {}, + "source": [ + "Unlike :class:`Hist`, numeric data are not binned before counting:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f94c5f0-680e-4d8a-a1c9-70876980dd1c", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(tips, x=\"size\").add(so.Bar(), so.Count())" + ] + }, + { + "cell_type": "raw", + "id": "11acd5e6-f477-4eb1-b1d7-72f4582bca45", + "metadata": {}, + "source": [ + "When the `y` variable is defined, the counts are assigned to the `x` variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "924e0e35-210f-4f65-83b4-4aebe41ad264", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(tips, y=\"size\").add(so.Bar(), so.Count())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0229fa39-b6dc-48da-9a25-31e25ed34ebc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Dash.ipynb b/doc/_docstrings/objects.Dash.ipynb new file mode 100644 index 0000000000..845fbc5216 --- /dev/null +++ b/doc/_docstrings/objects.Dash.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3227e585-7166-44e7-b0c2-8570e098102d", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "1b424322-eaa4-45c7-8007-a671ef2afbde", + "metadata": {}, + "source": [ + "A line segment is drawn for each datapoint, centered on the value along the orientation axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc835356-2dc2-4583-a9f9-c1fe0a6cc9ea", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(penguins, \"species\", \"body_mass_g\", color=\"sex\")\n", + "p.add(so.Dash())" + ] + }, + { + "cell_type": "raw", + "id": "ad9b94de-f19f-4e60-8275-686e749da39c", + "metadata": {}, + "source": [ + "A number of properties can be mapped or set directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6070a665-ab19-43a6-9eba-e206193d9422", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dash(alpha=.5), linewidth=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "raw", + "id": "2c4a8291-0a84-4e70-a992-756850933791", + "metadata": {}, + "source": [ + "The mark has a `width` property, which is relative to the spacing between orientation values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "315327da-421e-46c8-8a1b-8b87355d0439", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dash(width=.5))" + ] + }, + { + "cell_type": "raw", + "id": "224bf51a-b8d8-4d8e-b0ab-b63ec6788584", + "metadata": {}, + "source": [ + "When dodged, the width will automatically adapt:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "227e889c-7ce7-49fc-b985-f7746393930e", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dash(), so.Dodge())" + ] + }, + { + "cell_type": "raw", + "id": "aa807f57-5d37-4faa-8fd2-1e5378115f9f", + "metadata": {}, + "source": [ + "This mark works well to show aggregate values when paired with a strip plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5141e0b8-ea1a-4178-adde-21b4bc2e705f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p\n", + " .add(so.Dash(), so.Agg(), so.Dodge())\n", + " .add(so.Dots(), so.Dodge(), so.Jitter())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "f2abd4b7-5afb-4661-95f3-b51bfa101273", + "metadata": {}, + "source": [ + "When both coordinate variables are numeric, you can control the orientation explicitly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6d7e236-327f-460f-b12e-46d7444ac348", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " penguins[\"body_mass_g\"],\n", + " penguins[\"flipper_length_mm\"].round(-1),\n", + " )\n", + " .add(so.Dash(), orient=\"y\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6811d776-93e5-49ce-88a6-14786a67841d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Dodge.ipynb b/doc/_docstrings/objects.Dodge.ipynb new file mode 100644 index 0000000000..1b3c0e1d07 --- /dev/null +++ b/doc/_docstrings/objects.Dodge.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4d44a940-db84-4e16-bc83-e67d08d6d56a", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "tips = load_dataset(\"tips\").astype({\"time\": str})" + ] + }, + { + "cell_type": "raw", + "id": "ce99e1a1-c213-478f-a5bc-d19e2c4d70db", + "metadata": {}, + "source": [ + "This transform modifies both the width and position (along the orientation axis) of marks that would otherwise overlap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6a84062-2c2b-4a45-91cb-77f29462104d", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"day\", color=\"time\")\n", + " .add(so.Bar(), so.Count(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "55d3a9a8-c973-4e91-9f3a-bc137df15f48", + "metadata": {}, + "source": [ + "By default, empty space may appear when variables are not fully crossed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08ae1c65-5ad9-47a3-a8f3-d901bd4821f2", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(tips, \"day\", color=\"time\")\n", + "p.add(so.Bar(), so.Count(), so.Dodge())" + ] + }, + { + "cell_type": "raw", + "id": "2125f07d-4210-4d49-8761-bcfa3f9c67f5", + "metadata": {}, + "source": [ + "The `empty` parameter handles this case; use it to fill out the space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2314343-de73-45d7-9595-acf5f7d62e93", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bar(), so.Count(), so.Dodge(empty=\"fill\"))" + ] + }, + { + "cell_type": "raw", + "id": "08f4382c-842e-4777-a452-1d88251da6e7", + "metadata": {}, + "source": [ + "Or center the marks while using a consistent width:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0745e4-be11-4703-bf9c-4b13cbb76e91", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bar(), so.Count(), so.Dodge(empty=\"drop\"))" + ] + }, + { + "cell_type": "raw", + "id": "7d29ec53-caef-4cff-9828-dc242adb5c49", + "metadata": {}, + "source": [ + "Use `gap` to add a bit of spacing between dodged marks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "342aca16-c67b-4bc4-9101-fec6c398aa0f", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(tips, \"day\", \"total_bill\", color=\"sex\")\n", + "p.add(so.Bar(), so.Agg(\"sum\"), so.Dodge(gap=.1))" + ] + }, + { + "cell_type": "raw", + "id": "68b52dcb-c5e7-4186-b61f-e96fac5f4d40", + "metadata": {}, + "source": [ + "When multiple semantic variables are used, each distinct group will be dodged:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "497f3e3b-39bc-4381-85bb-be5bb5c60b1f", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dot(), so.Dodge(), fill=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "id": "795835d2-904f-4343-89c2-b91be9c1c504", + "metadata": {}, + "source": [ + "Use `by` to dodge only a subset of variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da01f6c0-c425-409c-a010-5cb52a794dc9", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dot(), so.Dodge(by=[\"color\"]), fill=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "id": "77de77da-2fad-4374-9d14-90520e448c90", + "metadata": {}, + "source": [ + "When combining with other transforms (such as :class:`Jitter` or :class:`Stack`), be mindful of the order that they are applied in:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29ccabd6-6bd5-4563-a337-f8f8d25f7dad", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dot(), so.Dodge(), so.Jitter())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a73fe9a5-c717-41fd-874e-be72334ea6d4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Dot.ipynb b/doc/_docstrings/objects.Dot.ipynb new file mode 100644 index 0000000000..2a60745320 --- /dev/null +++ b/doc/_docstrings/objects.Dot.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "tips = load_dataset(\"tips\")\n", + "glue = load_dataset(\"glue\")" + ] + }, + { + "cell_type": "raw", + "id": "f8e7b343-0301-49b3-8d42-862266d322bb", + "metadata": {}, + "source": [ + "This mark draws relatively large, filled dots by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f92e97d0-b6a5-41ec-8507-dc64e60cb6e0", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = so.Plot(tips, \"total_bill\", \"tip\")\n", + "p1.add(so.Dot())" + ] + }, + { + "cell_type": "raw", + "id": "625abe2a-7b0b-42a7-bfbc-dc2bfaf14897", + "metadata": {}, + "source": [ + "While :class:`Dots` is a better choice for dense scatter plots, adding a thin edge can help to resolve individual points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3c7c22d-c7ce-40a9-941b-a8bc30db1e54", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dot(edgecolor=\"w\"))" + ] + }, + { + "cell_type": "markdown", + "id": "398a43e1-4d45-42ea-bc87-41a8602540a4", + "metadata": {}, + "source": [ + "Dodging and jittering can also help to reduce overplotting, when appropriate:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b15e393-35cf-457f-8180-d92d05e2675a", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"total_bill\", \"day\", color=\"sex\")\n", + " .add(so.Dot(), so.Dodge(), so.Jitter(.2))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "12453ada-40e6-4aad-9f32-ba41fd7b27ca", + "metadata": {}, + "source": [ + "The larger dot size makes this mark well suited to representing values along a nominal scale:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd2edac0-ee6b-4cc9-8201-641b589630b8", + "metadata": {}, + "outputs": [], + "source": [ + "p2 = so.Plot(glue, \"Score\", \"Model\").facet(\"Task\", wrap=4).limit(x=(-5, 105))\n", + "p2.add(so.Dot())" + ] + }, + { + "cell_type": "raw", + "id": "ddd86209-d5cd-4f7a-9274-c578bc6a9f07", + "metadata": {}, + "source": [ + "A number of properties can be set or mapped:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d00cdc35-4b9c-4f32-a047-8e036e565c4f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p2\n", + " .add(so.Dot(pointsize=6), color=\"Year\", marker=\"Encoder\")\n", + " .scale(marker=[\"o\", \"s\"], color=\"flare\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "061e22f4-8505-425d-8c80-8ac82c6a3125", + "metadata": {}, + "source": [ + "Note that the edge properties are parameterized differently for filled and unfilled markers; use `stroke` and `color` rather than `edgewidth` and `edgecolor` if the marker is unfilled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "964b00be-1c29-4664-838d-0daeead9154a", + "metadata": {}, + "outputs": [], + "source": [ + "p2.add(so.Dot(stroke=1.5), fill=\"Encoder\", color=\"Encoder\")" + ] + }, + { + "cell_type": "raw", + "id": "fb5e1383-1460-4389-a67b-09ec7965af90", + "metadata": {}, + "source": [ + "Combine with :class:`Range` to show error bars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2618c22-bc7f-4ddd-9824-346e8d9b2b51", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"day\")\n", + " .add(so.Dot(pointsize=3), so.Shift(y=.2), so.Jitter(.2))\n", + " .add(so.Dot(), so.Agg())\n", + " .add(so.Range(), so.Est(errorbar=(\"se\", 2)))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5dc04fd-dba4-4b86-99a1-31ba00c7650d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Dots.ipynb b/doc/_docstrings/objects.Dots.ipynb new file mode 100644 index 0000000000..2576b899b2 --- /dev/null +++ b/doc/_docstrings/objects.Dots.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "mpg = load_dataset(\"mpg\")" + ] + }, + { + "cell_type": "raw", + "id": "f8e7b343-0301-49b3-8d42-862266d322bb", + "metadata": {}, + "source": [ + "This mark draws relatively small, partially-transparent dots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d668d7f6-555b-4b5d-876e-35e259076d2a", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = so.Plot(mpg, \"horsepower\", \"mpg\")\n", + "p1.add(so.Dots())" + ] + }, + { + "cell_type": "raw", + "id": "a2cf4669-9c91-4adc-9e3a-3b0660e7898e", + "metadata": {}, + "source": [ + "Fixing or mapping the `color` property changes both the stroke (edge) and fill:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bba2b1c5-22fd-4f44-af8d-defb31dfbe9d", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dots(), color=\"origin\")" + ] + }, + { + "cell_type": "raw", + "id": "bf967d57-22cf-4bce-b718-aae6936719e6", + "metadata": {}, + "source": [ + "These properties can be independently parametrized (although the resulting plot may not always be clear):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c45261a9-fb88-4eb5-b633-060debda261b", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p1.add(so.Dots(fillalpha=.5), color=\"origin\", fillcolor=\"weight\")\n", + " .scale(fillcolor=\"binary\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "b20dcaee-8e09-4a76-8eff-5289ef43ea8c", + "metadata": {}, + "source": [ + "Filled and unfilled markers will happily mix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1a9bdda-abb7-4850-a936-ceed518b9b17", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dots(stroke=1), marker=\"origin\").scale(marker=[\"o\", \"x\", (6, 2, 1)])" + ] + }, + { + "cell_type": "raw", + "id": "1d932f10-e8f8-4114-9362-3da82c7b5ac0", + "metadata": {}, + "source": [ + "The partial opacity also helps to see local density when using jitter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "692e1611-4804-4979-b616-041e9fa9cdd9", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg, \"horsepower\", \"origin\")\n", + " .add(so.Dots(), so.Jitter(.25))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd5788f-e62b-497c-a109-f0bc02b8cae9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Est.ipynb b/doc/_docstrings/objects.Est.ipynb new file mode 100644 index 0000000000..94aacfa902 --- /dev/null +++ b/doc/_docstrings/objects.Est.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "57ececfa-0ae0-4acb-b85d-7c6a6ca8d3db", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "03c64256-8daf-4b32-87bd-b425e27a7823", + "metadata": {}, + "source": [ + "The default behavior is to compute the mean and 95% confidence interval (using bootstrapping):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46017dc7-7c3c-4dcf-9232-2e3ac490d980", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "p = so.Plot(diamonds, \"clarity\", \"carat\")\n", + "p.add(so.Range(), so.Est())" + ] + }, + { + "cell_type": "raw", + "id": "1bf04e8d-998e-4a47-9375-ddcde76e3914", + "metadata": {}, + "source": [ + "Other estimators may be selected by name if they are pandas methods:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea394c55-8fa6-4fb0-8665-42c03ef3576e", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(\"median\"))" + ] + }, + { + "cell_type": "raw", + "id": "9c5f3c91-fecb-4e75-b045-b30870154083", + "metadata": {}, + "source": [ + "There are several options for computing the error bar interval, such as (scaled) standard errors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c350af5-d549-4cce-b3f2-e9bef33aef36", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(errorbar=\"se\"))" + ] + }, + { + "cell_type": "raw", + "id": "8c8d321b-5e73-418c-8c71-4b91cf187e57", + "metadata": {}, + "source": [ + "The error bars can also represent the spread of the distribution around the estimate using (scaled) standard deviations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd2cd9dc-e4c9-4ba1-ac79-38806cf1e009", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(errorbar=\"sd\"))" + ] + }, + { + "cell_type": "raw", + "id": "6dba074b-881c-40df-b42e-458e4a26e23d", + "metadata": {}, + "source": [ + "Because confidence intervals are computed using bootstrapping, there will be small amounts of randomness. Reduce the random variability by increasing the nubmer of bootstrap iterations (although this will be slower), or eliminate it by seeding the random number generator:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6b450e1-8b1f-411f-aa01-bbb46ab3b6ec", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(seed=0))" + ] + }, + { + "cell_type": "markdown", + "id": "df807ef8-b5fb-4eac-b539-1bd4e797ddc2", + "metadata": {}, + "source": [ + "To compute a weighted estimate (and confidence interval), assign a `weight` variable in the layer where you use the stat:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e4a0594-e1ee-4f72-971e-3763dd626e8b", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Range(), so.Est(), weight=\"price\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d0c34d7-fb76-44cf-9079-3ec7f45741d0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Hist.ipynb b/doc/_docstrings/objects.Hist.ipynb new file mode 100644 index 0000000000..93ed02ea21 --- /dev/null +++ b/doc/_docstrings/objects.Hist.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "59690096-a0ad-4ff3-b82c-0258d724035a", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "c345a35c-bac8-4163-ba40-e7c208df1033", + "metadata": {}, + "source": [ + "For discrete or categorical variables, this stat is commonly combined with a :class:`Bar` mark:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a96ac9b-1240-496d-9385-840205945208", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(penguins, \"island\").add(so.Bar(), so.Hist())" + ] + }, + { + "cell_type": "raw", + "id": "1e5ff9d5-c6a9-4adc-a9be-0f155b1575be", + "metadata": {}, + "source": [ + "When used to estimate a univariate distribution, it is better to use the :class:`Bars` mark:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f3e3144-752a-4d71-9528-85eb1ed0a9a4", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(penguins, \"flipper_length_mm\")\n", + "p.add(so.Bars(), so.Hist())" + ] + }, + { + "cell_type": "raw", + "id": "008b9ffe-da74-4406-9756-4f70e333f33b", + "metadata": {}, + "source": [ + "The granularity of the bins will influence whether the underlying distribution is accurately represented. Adjust it by setting the total number:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27d221d5-add5-40a8-85d2-05102384dad1", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(bins=20))" + ] + }, + { + "cell_type": "raw", + "id": "fffebb54-0299-45c5-b7fb-6fcad6427239", + "metadata": {}, + "source": [ + "Alternatively, specify the *width* of the bins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d036ca65-7dcf-45ac-a2d1-caafb9f922a7", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(binwidth=5))" + ] + }, + { + "cell_type": "raw", + "id": "bc1e4bd3-2a16-42bd-9c13-a660dd381f66", + "metadata": {}, + "source": [ + "By default, the transform returns the count of observations in each bin. The counts can be normalized, e.g. to show a proportion:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbf23712-2231-4226-8265-0e2a5299c4bb", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(stat=\"proportion\"))" + ] + }, + { + "cell_type": "raw", + "id": "6c6fb23e-78c5-4630-a958-62cb4dee4ec8", + "metadata": {}, + "source": [ + "When additional variables define groups, the default behavior is to normalize across all groups:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac3fe4ef-56e3-4ec7-b580-596d2a3d924b", + "metadata": {}, + "outputs": [], + "source": [ + "p = p.facet(\"island\")\n", + "p.add(so.Bars(), so.Hist(stat=\"proportion\"))" + ] + }, + { + "cell_type": "raw", + "id": "f7afc403-26cc-4325-a28a-913c2291aa35", + "metadata": {}, + "source": [ + "Pass `common_norm=False` to normalize each distribution independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2029324-069f-4261-a178-1efad2fd0e88", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(stat=\"proportion\", common_norm=False))" + ] + }, + { + "cell_type": "raw", + "id": "0f83401a-e456-4a14-af69-f1483c6c03c4", + "metadata": {}, + "source": [ + "Or, with more than one grouping varible, specify a subset to normalize within:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c092262-8a8f-4a3e-8cae-9e0f23dd94ba", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(stat=\"proportion\", common_norm=[\"col\"]), color=\"sex\")" + ] + }, + { + "cell_type": "raw", + "id": "86532133-bf33-4674-9614-86ae3408aa51", + "metadata": {}, + "source": [ + "When distributions overlap it may be easier to discern their shapes with an :class:`Area` mark:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00b18ad8-52d4-460a-a012-d87c66b3e71e", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.Hist(), color=\"sex\")" + ] + }, + { + "cell_type": "raw", + "id": "2b34d435-abbf-41aa-b219-91883d7d29f3", + "metadata": {}, + "source": [ + "Or add :class:`Stack` move to represent a part-whole relationship:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a7a0c05-d774-4f99-950f-5dc9865027c4", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Bars(), so.Hist(), so.Stack(), color=\"sex\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e247e74b-2c09-40f0-8f45-9fa5f8264d78", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Jitter.ipynb b/doc/_docstrings/objects.Jitter.ipynb new file mode 100644 index 0000000000..ede8ce43c5 --- /dev/null +++ b/doc/_docstrings/objects.Jitter.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "f2e5a85d-c710-492b-a4fc-09b45ae26471", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "14b5927c-42f1-4934-adee-3d380b8b3228", + "metadata": {}, + "source": [ + "When used without any arguments, a small amount of jitter will be applied along the orientation axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc1b4941-bbe6-4afc-b51a-0ac67cbe417d", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, \"species\", \"body_mass_g\")\n", + " .add(so.Dots(), so.Jitter())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "1101690e-6c19-4219-aa4e-180798454df1", + "metadata": {}, + "source": [ + "The `width` parameter controls the amount of jitter relative to the spacing between the marks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4251b9d-8b11-4c2c-905c-2f3b523dee70", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, \"species\", \"body_mass_g\")\n", + " .add(so.Dots(), so.Jitter(.5))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "38aa639a-356e-4674-970b-53d55379b2b7", + "metadata": {}, + "source": [ + "The `width` parameter always applies to the orientation axis, so the direction of jitter will adapt along with the orientation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cfe1c07-7e81-45a0-a989-240503046133", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, \"body_mass_g\", \"species\")\n", + " .add(so.Dots(), so.Jitter(.5))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0f5de4cc-3383-4503-8b59-9c48230a12a5", + "metadata": {}, + "source": [ + "Because the `width` jitter is relative, it can be used when the orientation axis is numeric without further tweaking:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c94c41e8-29c4-4439-a5d1-0b8ffb244890", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n", + " .add(so.Dots(), so.Jitter())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "dd982dfa-fd9f-4edc-8190-18f0e101ae1a", + "metadata": {}, + "source": [ + "In contrast to `width`, the `x` and `y` parameters always refer to specific axes and control the jitter in data units:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0f2e5ca-68ad-4439-a4ee-f32f65682e95", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n", + " .add(so.Dots(), so.Jitter(x=100))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a90ba526-8043-42ed-8f57-36445c163c0d", + "metadata": {}, + "source": [ + "Both `x` and `y` can be used in a single transform:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c07ed1d-ac77-4b30-90a8-e1b8760f9fad", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " penguins[\"body_mass_g\"].round(-3),\n", + " penguins[\"flipper_length_mm\"].round(-1),\n", + " )\n", + " .add(so.Dots(), so.Jitter(x=200, y=5))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb04c7a2-93f0-44cf-aacf-0eb436d0f14b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.KDE.ipynb b/doc/_docstrings/objects.KDE.ipynb new file mode 100644 index 0000000000..863a5a16ad --- /dev/null +++ b/doc/_docstrings/objects.KDE.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "dcc1ae12-bba4-4de9-af8d-543b3d65b42b", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "1042b991-1471-43bd-934c-43caae3cb2fa", + "metadata": {}, + "source": [ + "This stat estimates transforms observations into a smooth function representing the estimated density:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2406e2aa-7f0f-4a51-af59-4cef827d28d8", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(penguins, x=\"flipper_length_mm\")\n", + "p.add(so.Area(), so.KDE())" + ] + }, + { + "cell_type": "raw", + "id": "44515f21-683b-420f-967b-4c7568c907d7", + "metadata": {}, + "source": [ + "Adjust the smoothing bandwidth to see more or fewer details:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4e6ba5b-4dd2-4210-8cf0-de057dc71e2a", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.KDE(bw_adjust=0.25))" + ] + }, + { + "cell_type": "raw", + "id": "fd665fe1-a5e4-4742-adc9-e40615d57d08", + "metadata": {}, + "source": [ + "The curve will extend beyond observed values in the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cda1cb8-f663-4f94-aa24-6f1727a41031", + "metadata": {}, + "outputs": [], + "source": [ + "p2 = p.add(so.Bars(alpha=.3), so.Hist(\"density\"))\n", + "p2.add(so.Line(), so.KDE())" + ] + }, + { + "cell_type": "raw", + "id": "75235825-d522-4562-aacc-9b7413eabf5d", + "metadata": {}, + "source": [ + "Control the range of the density curve relative to the observations using `cut`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7a9275e-9889-437d-bdc5-18653d2c92ef", + "metadata": {}, + "outputs": [], + "source": [ + "p2.add(so.Line(), so.KDE(cut=0))" + ] + }, + { + "cell_type": "raw", + "id": "6a885eeb-81ba-47c6-8402-1bef40544fd1", + "metadata": {}, + "source": [ + "When observations are assigned to the `y` variable, the density will be shown for `x`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38b3a0fb-54ff-493a-bd64-f83a12365723", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(penguins, y=\"flipper_length_mm\").add(so.Area(), so.KDE())" + ] + }, + { + "cell_type": "raw", + "id": "59996340-168e-479f-a0c6-c7e1fcab0fb0", + "metadata": {}, + "source": [ + "Use `gridsize` to increase or decrease the resolution of the grid where the density is evaluated:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23715820-7df9-40ba-9e74-f11564704dd0", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dots(), so.KDE(gridsize=100))" + ] + }, + { + "cell_type": "raw", + "id": "4c9b6492-98c8-45ab-9f53-681cde2f767a", + "metadata": {}, + "source": [ + "Or pass `None` to evaluate the density at the original datapoints:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e1b6810-5c28-43aa-aa61-652521299b51", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dots(), so.KDE(gridsize=None))" + ] + }, + { + "cell_type": "raw", + "id": "0970a56b-0cba-4c40-bb1b-b8e71739df5c", + "metadata": {}, + "source": [ + "Other variables will define groups for the estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f0ce0b6-5742-4bc0-9ac3-abedde923684", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.KDE(), color=\"species\")" + ] + }, + { + "cell_type": "raw", + "id": "22204fcd-4b25-46e5-a170-02b1419c23d5", + "metadata": {}, + "source": [ + "By default, the density is normalized across all groups (i.e., the joint density is shown); pass `common_norm=False` to show conditional densities:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ad56958-dc45-4632-94d1-23039ad3ec58", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.KDE(common_norm=False), color=\"species\")" + ] + }, + { + "cell_type": "raw", + "id": "b1627197-85d1-4476-b4ae-3e93044ee988", + "metadata": {}, + "source": [ + "Or pass a list of variables to condition on:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f63734-5afd-4d90-bbfb-fc39c8d1981f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p.facet(\"sex\")\n", + " .add(so.Area(), so.KDE(common_norm=[\"col\"]), color=\"species\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "2b7e018e-1374-4939-909c-e95f5ffd086e", + "metadata": {}, + "source": [ + "This stat can be combined with other transforms, such as :class:`Stack` (when `common_grid=True`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96e5b2d0-c7e2-47df-91f1-7f9ec0bb08a9", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.KDE(), so.Stack(), color=\"sex\")" + ] + }, + { + "cell_type": "raw", + "id": "8500ff86-0b1f-4831-954b-08b6df690387", + "metadata": {}, + "source": [ + "Set `cumulative=True` to integrate the density:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26bb736e-7cfd-421e-b80d-42fa450e88c0", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Line(), so.KDE(cumulative=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8bfd9d2-ad60-4971-aa7f-71a285f44a20", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Line.ipynb b/doc/_docstrings/objects.Line.ipynb new file mode 100644 index 0000000000..c0e5587f51 --- /dev/null +++ b/doc/_docstrings/objects.Line.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "dowjones = load_dataset(\"dowjones\")\n", + "fmri = load_dataset(\"fmri\")" + ] + }, + { + "cell_type": "markdown", + "id": "05468ecf-d2f5-46f0-ba43-ea13aba0ebd2", + "metadata": {}, + "source": [ + "The mark draws a connecting line between sorted observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd5788f-e62b-497c-a109-f0bc02b8cae9", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(dowjones, \"Date\", \"Price\").add(so.Line())" + ] + }, + { + "cell_type": "markdown", + "id": "94efb077-49a5-4214-891a-c68f89c79926", + "metadata": {}, + "source": [ + "Change the orientation to connect observations along the opposite axis (`orient=\"y\"` is redundant here; the plot would detect that the date variable has a lower orientation priority than the price variable):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c5db48f-1c88-4905-a5f5-2ae96ceb0f95", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(dowjones, x=\"Price\", y=\"Date\").add(so.Line(), orient=\"y\")" + ] + }, + { + "cell_type": "raw", + "id": "77bd0b1e-d9d1-4741-9821-83cec708e877", + "metadata": {}, + "source": [ + "To replicate the same line multiple times, assign a `group` variable (but consider using :class:`Lines` here instead):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c1b699c-4e42-4461-a7fb-0d664ef8fe1b", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " fmri\n", + " .query(\"region == 'parietal' and event == 'stim'\")\n", + " .pipe(so.Plot, \"timepoint\", \"signal\")\n", + " .add(so.Line(color=\".2\", linewidth=1), group=\"subject\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "c09cc6a1-a86b-48b7-b276-e0e9125d279e", + "metadata": {}, + "source": [ + "When mapping variables to properties like `color` or `linestyle`, stat transforms are computed within each grouping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83b8c68d-a1ae-4bfb-b3dc-4a11bbe85cbc", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(fmri, \"timepoint\", \"signal\", color=\"region\", linestyle=\"event\")\n", + "p.add(so.Line(), so.Agg())" + ] + }, + { + "cell_type": "raw", + "id": "c9390f58-0fb1-47ba-8b86-bde4c41e6d1d", + "metadata": {}, + "source": [ + "Combine with :class:`Band` to show an error bar:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6ab0006-0f28-4992-b687-41889a424684", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p\n", + " .add(so.Line(), so.Agg())\n", + " .add(so.Band(), so.Est(), group=\"event\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e567df5c-6675-423f-bcd8-94cb3a400251", + "metadata": {}, + "source": [ + "Add markers to indicate values where the data were sampled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2541701c-1a2c-44dd-b300-6551861c8b98", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Line(marker=\"o\", edgecolor=\"w\"), so.Agg(), linestyle=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a25d0379-b374-4539-82a4-00ce37245e1b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Lines.ipynb b/doc/_docstrings/objects.Lines.ipynb new file mode 100644 index 0000000000..012a5c4eb4 --- /dev/null +++ b/doc/_docstrings/objects.Lines.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "seaice = load_dataset(\"seaice\")" + ] + }, + { + "cell_type": "raw", + "id": "09694cb8-4867-49fc-80a6-a4551e50b77e", + "metadata": {}, + "source": [ + "Like :class:`Line`, the mark draws a connecting line between sorted observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd5788f-e62b-497c-a109-f0bc02b8cae9", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(seaice, \"Date\", \"Extent\").add(so.Lines())" + ] + }, + { + "cell_type": "raw", + "id": "8f982f2d-1119-4842-9860-80b415fd24fe", + "metadata": {}, + "source": [ + "Compared to :class:`Line`, this mark offers fewer settable properties, but it can have better performance when drawing a large number of lines:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4411136-1787-47ca-91f4-4ecba541e575", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " x=seaice[\"Date\"].dt.day_of_year,\n", + " y=seaice[\"Extent\"],\n", + " color=seaice[\"Date\"].dt.year\n", + " )\n", + " .facet(seaice[\"Date\"].dt.year.round(-1))\n", + " .add(so.Lines(linewidth=.5, color=\"#bbca\"), col=None)\n", + " .add(so.Lines(linewidth=1))\n", + " .scale(color=\"ch:rot=-.2,light=.7\")\n", + " .layout(size=(8, 4))\n", + " .label(title=\"{}s\".format)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aaab3914-77d7-4d09-bdbe-f057a2fe28cf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Norm.ipynb b/doc/_docstrings/objects.Norm.ipynb new file mode 100644 index 0000000000..dee130640c --- /dev/null +++ b/doc/_docstrings/objects.Norm.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0bfee8b6-1e3e-499d-96ae-735a5c230b32", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "healthexp = load_dataset(\"healthexp\")" + ] + }, + { + "cell_type": "raw", + "id": "43adf565-2843-48fe-a12a-1a65bc9fce9f", + "metadata": {}, + "source": [ + "By default, this transform scales each group relative to its maximum value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6262c89d-56cd-41b4-8276-0bf737b02f29", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Spending_USD\", color=\"Country\")\n", + " .add(so.Lines(), so.Norm())\n", + " .label(y=\"Spending relative to maximum amount\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "5941b47a-7f2f-4540-9944-c6a16e7eec75", + "metadata": {}, + "source": [ + "Use `where` to constrain the values used to define a baseline, and `percent` to scale the output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8142d0b4-1b91-4ba9-bc60-3df148130ff9", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Spending_USD\", color=\"Country\")\n", + " .add(so.Lines(), so.Norm(where=\"x == x.min()\", percent=True))\n", + " .label(y=\"Percent change in spending from 1970 baseline\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f2d2d33-8a92-44fb-b37a-24dee23a7d75", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Path.ipynb b/doc/_docstrings/objects.Path.ipynb new file mode 100644 index 0000000000..6ec364ff94 --- /dev/null +++ b/doc/_docstrings/objects.Path.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "healthexp = load_dataset(\"healthexp\").sort_values([\"Country\", \"Year\"])" + ] + }, + { + "cell_type": "raw", + "id": "8c2781ed-190d-4155-99ac-0170b94de030", + "metadata": {}, + "source": [ + "Unlike :class:`Line`, this mark does not sort observations before plotting, making it suitable for plotting trajectories through a variable space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "199c0b22-1cbd-4b5a-bebe-f59afa79b9c6", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(healthexp, \"Spending_USD\", \"Life_Expectancy\", color=\"Country\")\n", + "p.add(so.Path())" + ] + }, + { + "cell_type": "raw", + "id": "fb87bd85-024b-42f5-b458-3550271d7124", + "metadata": {}, + "source": [ + "It otherwise offers the same set of options, including a number of properties that can be set or mapped:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "280de309-1c0d-4cdc-8f4c-a4f15da461cf", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Path(marker=\"o\", pointsize=2, linewidth=.75, fillcolor=\"w\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e795770-4481-4e23-a49b-e828a1f5cbbd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Paths.ipynb b/doc/_docstrings/objects.Paths.ipynb new file mode 100644 index 0000000000..5f326bf07a --- /dev/null +++ b/doc/_docstrings/objects.Paths.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "networks = (\n", + " load_dataset(\"brain_networks\", header=[0, 1, 2], index_col=0)\n", + " .rename_axis(\"timepoint\")\n", + " .stack([0, 1, 2])\n", + " .groupby([\"timepoint\", \"network\", \"hemi\"])\n", + " .mean()\n", + " .unstack(\"network\")\n", + " .reset_index()\n", + " .query(\"timepoint < 100\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "50646936-5236-413f-b79b-6c3b640ade04", + "metadata": {}, + "source": [ + "Unlike :class:`Lines`, this mark does not sort observations before plotting, making it suitable for plotting trajectories through a variable space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a3ed115-cc47-4ea8-be46-2c99f7453941", + "metadata": {}, + "outputs": [], + "source": [ + "p = (\n", + " so.Plot(networks)\n", + " .pair(\n", + " x=[\"5\", \"8\", \"12\", \"15\"],\n", + " y=[\"6\", \"13\", \"16\"],\n", + " )\n", + " .layout(size=(8, 5))\n", + " .share(x=True, y=True)\n", + ")\n", + "p.add(so.Paths())" + ] + }, + { + "cell_type": "raw", + "id": "5bf502eb-feb3-4b2e-882b-3e915bf5d041", + "metadata": {}, + "source": [ + "The mark has the same set of properties as :class:`Lines`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "326a765b-59f0-46ef-91c2-6705c6893740", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Paths(linewidth=1, alpha=.8), color=\"hemi\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "175b836d-d328-4b6c-ad36-dde18c19e3bf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Perc.ipynb b/doc/_docstrings/objects.Perc.ipynb new file mode 100644 index 0000000000..d1c8094aea --- /dev/null +++ b/doc/_docstrings/objects.Perc.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2d44a326-029b-47ff-b560-5f4b6a4bb73f", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "65e975a2-2559-4bf1-8851-8bbbf52bf22d", + "metadata": {}, + "source": [ + "The default behavior computes the quartiles and min/max of the input data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36f927f5-3b64-4871-a355-adadc4da769b", + "metadata": {}, + "outputs": [], + "source": [ + "p = (\n", + " so.Plot(diamonds, \"cut\", \"price\")\n", + " .scale(y=\"log\")\n", + ")\n", + "p.add(so.Dot(), so.Perc())" + ] + }, + { + "cell_type": "raw", + "id": "feba1b99-0f71-4b18-8e7e-bd5470cc2d0c", + "metadata": {}, + "source": [ + "Passing an integer will compute that many evenly-spaced percentiles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f030dd39-1223-475a-93e1-1759a8971a6c", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dot(), so.Perc(20))" + ] + }, + { + "cell_type": "raw", + "id": "85bd754b-122e-4475-8727-2d584a90a38e", + "metadata": {}, + "source": [ + "Passing a list will compute exactly those percentiles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fde7549-45b5-411a-afba-eb0da754d9e9", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Dot(), so.Perc([10, 25, 50, 75, 90]))" + ] + }, + { + "cell_type": "raw", + "id": "7be16a13-dfc8-4595-a904-42f9be10f4f6", + "metadata": {}, + "source": [ + "Combine with a range mark to show a percentile interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05c561c6-0449-4a61-96d1-390611a1b694", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, \"price\", \"cut\")\n", + " .add(so.Dots(pointsize=1, alpha=.2), so.Jitter(.3))\n", + " .add(so.Range(color=\"k\"), so.Perc([25, 75]), so.Shift(y=.2))\n", + " .scale(x=\"log\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d464157c-3187-49c1-9cd8-71f284ce4c50", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.add.ipynb b/doc/_docstrings/objects.Plot.add.ipynb new file mode 100644 index 0000000000..3ef5928a95 --- /dev/null +++ b/doc/_docstrings/objects.Plot.add.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "tips = load_dataset(\"tips\")" + ] + }, + { + "cell_type": "raw", + "id": "33cd5d3c-d3ad-4e3b-bdac-350f8e104594", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Every layer must be defined with a :class:`Mark`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43d0401a-d7d5-4746-a02f-a48f8b5fd1f2", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(tips, \"total_bill\", \"tip\").add(so.Dot())\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "34b4f581-6126-4d57-ac76-8821c5daa97b", + "metadata": {}, + "source": [ + "Call :class:`Plot.add` multiple times to add multiple layers. In addition to the :class:`Mark`, layers can also be defined with :class:`Stat` or :class:`Move` transforms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "693c461e-1dc2-4b44-a9e5-c07b1bf0108b", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Line(), so.PolyFit())" + ] + }, + { + "cell_type": "raw", + "id": "96a61426-0de2-4f4b-a373-0006da6fcceb", + "metadata": {}, + "source": [ + "Multiple transforms can be stacked into a pipeline. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b22623a7-bfde-493c-8593-76b145fa1e84", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, y=\"day\", color=\"sex\")\n", + " .add(so.Bar(), so.Hist(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "aa8e6bde-c86c-4bd8-abbe-e0fc64103114", + "metadata": {}, + "source": [ + "Layers have an \"orientation\", which affects the transforms and some marks. The orientation is typically inferred from the variable types assigned to `x` and `y`, but it can be specified when it would otherwise be ambiguous:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42be495b-e41b-4883-b061-0973c0e8b496", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"size\", color=\"time\")\n", + " .add(so.Dot(alpha=.5), so.Dodge(), so.Jitter(.4), orient=\"y\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0d2a77f2-6a21-4fe6-a8b1-66978f4f072b", + "metadata": {}, + "source": [ + "Variables can be assigned to a specific layer. Note the distinction between how `pointsize` is passed to :class:`Plot.add` — so it is *mapped* by a scale — while `color` and `linewidth` are passed directly to :class:`Line`, so they directly set the line's color and width:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e42c3699-c468-4c21-b417-3952311735eb", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"total_bill\", \"tip\")\n", + " .add(so.Dots(), pointsize=\"size\")\n", + " .add(so.Line(color=\".3\", linewidth=3), so.PolyFit())\n", + " .scale(pointsize=(2, 10))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "d61908e5-9074-443d-9160-2c3101a39bcd", + "metadata": {}, + "source": [ + "Variables that would otherwise apply to the entire plot can also be *excluded* from a specific layer by setting their value to `None`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a095ecca-b428-4bad-a9ab-4d4f05cf61e0", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"total_bill\", \"tip\", color=\"day\")\n", + " .facet(col=\"day\")\n", + " .add(so.Dot(color=\"#aabc\"), col=None, color=None)\n", + " .add(so.Dot())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "60f94773-668e-441e-9634-41473c26d3bd", + "metadata": {}, + "source": [ + "Variables used only by the transforms *must* be passed at the layer level:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d1ac7e8-5bbd-4a1a-a207-197a4251c2d3", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"day\")\n", + " .add(so.Bar(), so.Hist(), weight=\"size\")\n", + " .label(y=\"Total patrons\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "8a7a5ff7-c0f5-4787-8908-3cb13ea7a047", + "metadata": {}, + "source": [ + "Each layer can be provided with its own data source. If a data source was provided in the constructor, the layer data will be joined using its index:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45690aaa-1abf-40ae-be3b-1ab648f8be62", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, \"total_bill\", \"tip\")\n", + " .add(so.Dot(color=\"#aabc\"))\n", + " .add(so.Dot(), data=tips.query(\"size == 2\"), color=\"time\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e62f9e80-bfba-4516-a43a-a265dc35eb79", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Providing a `label` will annotate the layer in the plot's legend:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a403012a-e895-4e5b-b690-dc27efbeccad", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"size\")\n", + " .add(so.Line(color=\"C1\"), so.Agg(), y=\"total_bill\", label=\"Bill\")\n", + " .add(so.Line(color=\"C2\"), so.Agg(), y=\"tip\", label=\"Tip\")\n", + " .label(y=\"Value\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c14526a4-37bb-4f4c-84fa-e5c556eee5c2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.config.ipynb b/doc/_docstrings/objects.Plot.config.ipynb new file mode 100644 index 0000000000..7230db5c26 --- /dev/null +++ b/doc/_docstrings/objects.Plot.config.ipynb @@ -0,0 +1,177 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a38a6fed-51de-4dbc-8d5b-4971d06acf2e", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so" + ] + }, + { + "cell_type": "raw", + "id": "38081259-9382-4623-8d67-09aa114e0949", + "metadata": {}, + "source": [ + "Theme configuration\n", + "^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Theme changes made through the the :attr:`Plot.config` interface will apply to all subsequent :class:`Plot` instances. Use the :meth:`Plot.theme` method to modify the theme on a plot-by-plot basis.\n", + "\n", + "The theme is a dictionary of matplotlib `rc parameters `_. You can set individual parameters directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34ca0ce9-5284-47b6-8281-180709dbec89", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.theme[\"axes.facecolor\"] = \"white\"" + ] + }, + { + "cell_type": "raw", + "id": "b3f93646-8370-4c16-ace4-7bb811688758", + "metadata": {}, + "source": [ + "To change the overall style of the plot, update the theme with a dictionary of parameters, perhaps from one of seaborn's theming functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e5eb7d3-cc7a-4231-b887-db37045f3db4", + "metadata": {}, + "outputs": [], + "source": [ + "from seaborn import axes_style\n", + "so.Plot.config.theme.update(axes_style(\"whitegrid\"))" + ] + }, + { + "cell_type": "raw", + "id": "f7c7bd9c-722d-45db-902a-c2dcdef571ee", + "metadata": {}, + "source": [ + "To sync :class:`Plot` with matplotlib's global state, pass the `rcParams` dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd1cd96e-1a2c-474a-809f-20b8c4794578", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib as mpl\n", + "so.Plot.config.theme.update(mpl.rcParams)" + ] + }, + { + "cell_type": "raw", + "id": "7e305ec1-4a83-411f-91df-aee2ec4d1806", + "metadata": {}, + "source": [ + "The theme can also be reset back to seaborn defaults:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3146b1d-1b5e-464f-a631-e6d6caf161b3", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.theme.reset()" + ] + }, + { + "cell_type": "raw", + "id": "b6370088-02f6-4933-91c0-5763b86b7299", + "metadata": {}, + "source": [ + "Display configuration\n", + "^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "When returned from the last statement in a notebook cell, a :class:`Plot` will be compiled and embedded in the notebook as an image. By default, the image is rendered as HiDPI PNG. Alternatively, it is possible to display the plots in SVG format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bd9966e-d08f-46b4-ad44-07276d5efba8", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.display[\"format\"] = \"svg\"" + ] + }, + { + "cell_type": "raw", + "id": "845239ed-3a0f-4a94-97d0-364c2db3b9c8", + "metadata": {}, + "source": [ + "SVG images use vector graphics with \"infinite\" resolution, so they will appear crisp at any amount of zoom. The downside is that each plot element is drawn separately, so the image data can get very heavy for certain kinds of plots (e.g., for dense scatterplots).\n", + "\n", + "The HiDPI scaling of the default PNG images will also inflate the size of the notebook they are stored in. (Unlike with SVG, PNG size will scale with the dimensions of the plot but not its complexity). When not useful, it can be disabled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13ac09f7-d4ad-4b4e-8963-edc0c6c71a94", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.display[\"hidpi\"] = False" + ] + }, + { + "cell_type": "raw", + "id": "ddebe3eb-1d64-41e9-9cfd-f8359d6f8a38", + "metadata": {}, + "source": [ + "The embedded images are scaled down slightly — independently from the figure size or DPI — so that more information can be presented on the screen. The precise scaling factor is also configurable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f10c5596-598d-4258-bf8f-67c07eaba266", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.display[\"scaling\"] = 0.7" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.facet.ipynb b/doc/_docstrings/objects.Plot.facet.ipynb new file mode 100644 index 0000000000..c8a7d1d769 --- /dev/null +++ b/doc/_docstrings/objects.Plot.facet.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "fb8e120d-5dcf-483b-a0d1-74857d09ce7d", + "metadata": {}, + "source": [ + ".. currentmodule:: seaborn.objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "markdown", + "id": "ae85e302-354c-46ca-a17f-aaec7ed1cbd6", + "metadata": {}, + "source": [ + "Assigning a faceting variable will create multiple subplots and plot subsets of the data on each of them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d65405fd-cf28-4248-8e51-1aa1999354a2", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(penguins, \"bill_length_mm\", \"bill_depth_mm\").add(so.Dots())\n", + "p.facet(\"species\")" + ] + }, + { + "cell_type": "markdown", + "id": "2b9630aa-3b46-4e72-82ef-5717c2d8c686", + "metadata": {}, + "source": [ + "Multiple faceting variables can be defined to create a two-dimensional grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1857144f-1373-4704-9332-d3fc649ceb9d", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(\"species\", \"sex\")" + ] + }, + { + "cell_type": "markdown", + "id": "7664e2d2-c254-44b4-9973-88e1d013fb3d", + "metadata": {}, + "source": [ + "Facet variables can be provided as references to the global plot data or as vectors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6569616d-480b-4b8c-a761-f5bd2bde60e3", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(penguins[\"island\"])" + ] + }, + { + "cell_type": "markdown", + "id": "198f63a0-bb0f-40c4-b790-bd15f8656acb", + "metadata": {}, + "source": [ + "With a single faceting variable, arrange the facets or limit to a subset by passing a list of levels to `order`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1344f7f-50d0-4592-b4fb-ab81d97a4798", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(\"species\", order=[\"Gentoo\", \"Adelie\"])" + ] + }, + { + "cell_type": "markdown", + "id": "2090297c-414f-4448-a930-5b6f0de18deb", + "metadata": {}, + "source": [ + "With multiple variables, pass `order` as a dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58ed1b13-71a7-462a-af99-78be566268a6", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(\"species\", \"sex\", order={\"col\": [\"Gentoo\", \"Adelie\"], \"row\": [\"Female\", \"Male\"]})" + ] + }, + { + "cell_type": "markdown", + "id": "e440f14d-24b2-4f83-a247-0bb917f9f4c3", + "metadata": {}, + "source": [ + "When the faceting variable has multiple levels, you can `wrap` it to distribute subplots across both dimensions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92baf66c-6dd9-4f50-adf2-386c4daab094", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(diamonds, x=\"carat\", y=\"price\").add(so.Dots())\n", + "p.facet(\"color\", wrap=4)" + ] + }, + { + "cell_type": "markdown", + "id": "8d0872cb-e261-4796-b81e-a416fea85201", + "metadata": {}, + "source": [ + "Wrapping works only when there is a single variable, but you can wrap in either direction:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5a66a64-bfba-437c-80be-1311e85cf5a5", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(row=\"color\", wrap=2)" + ] + }, + { + "cell_type": "raw", + "id": "e1bdaad7-5883-45ad-af39-c10183569bdc", + "metadata": {}, + "source": [ + "Use :meth:`Plot.share` to specify whether facets should be scaled the same way:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14c1f977-79d4-4f9c-a846-1fd70ad3569e", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(\"clarity\", wrap=3).share(x=False)" + ] + }, + { + "cell_type": "raw", + "id": "a4fc64d9-b7ba-4061-8160-63d8fd89e47a", + "metadata": {}, + "source": [ + "Use :meth:`Plot.label` to tweak the titles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4206b12c-d7a3-419f-b278-6edfe487c5de", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet(\"color\").label(title=\"{} grade\".format)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28b4fb9d-2bb0-40ff-a541-5f300aca6200", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.label.ipynb b/doc/_docstrings/objects.Plot.label.ipynb new file mode 100644 index 0000000000..1a9f02f93a --- /dev/null +++ b/doc/_docstrings/objects.Plot.label.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "fb32137a-e882-4222-9463-b8cf0ee1c8bd", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Use strings to override default labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65b4320e-6fb9-48ed-9132-53b0d21b85e6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p = (\n", + " so.Plot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + " .add(so.Dot(), color=\"species\")\n", + ")\n", + "p.label(x=\"Length\", y=\"Depth\", color=\"\")" + ] + }, + { + "cell_type": "raw", + "id": "a39626d2-76f5-40a9-a3fd-6f44dd69bd30", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Pass a function to *modify* the default label:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3540c54-1c91-4d55-8f58-cd758abbe2fd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p.label(color=str.capitalize)" + ] + }, + { + "cell_type": "markdown", + "id": "68f3b321-0755-4ef1-a9e6-bcff61a9178d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Use this method to set the title for a single-axes plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12d23c6e-781f-4b5c-a6b0-3ea0317ab7fb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p.label(title=\"Penguin species exhibit distinct bill shapes\")" + ] + }, + { + "cell_type": "markdown", + "id": "8e0bcb80-0929-4ab9-b5c0-13bb3d8e4484", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When faceting, the `title` parameter will modify default titles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da1516b7-b823-41c0-b251-01bdecb6a4e6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p.facet(\"sex\").label(title=str.upper)" + ] + }, + { + "cell_type": "markdown", + "id": "bb439eae-6cc3-4a6c-bef2-b4b7746edbd1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "And the `col`/`row` parameters will add labels to the title for each facet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0d49ba9-0507-4358-b477-2e0253f0df8f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p.facet(\"sex\").label(col=\"Sex:\")" + ] + }, + { + "cell_type": "markdown", + "id": "99471c06-1b1a-4ef5-844c-5f4aa8f322f5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "If more customization is needed, a format string can work well:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "848be3a3-5a2c-4b98-918f-825257be85ae", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p.facet(\"sex\").label(title=\"{} penguins\".format)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94012def-dd7c-48f4-8830-f77a3bf7299b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "p" + ] + }, + { + "cell_type": "raw", + "id": "e9b669e9-fd3d-4292-9c8d-e5fb093932b2", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "When adding labels for each layer, the `legend=` parameter sets the title for the legend:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78d22763-3f92-4be1-bc3f-bc24ad39da70", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\")\n", + " .add(so.Line(color=\"C1\"), so.Agg(), y=\"bill_length_mm\", label=\"length\")\n", + " .add(so.Line(color=\"C2\"), so.Agg(), y=\"bill_depth_mm\", label=\"depth\")\n", + " .label(legend=\"Measurement\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c7a7b91-bb5c-4bf5-99f8-719a220e3b36", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.layout.ipynb b/doc/_docstrings/objects.Plot.layout.ipynb new file mode 100644 index 0000000000..021cf7296c --- /dev/null +++ b/doc/_docstrings/objects.Plot.layout.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so" + ] + }, + { + "cell_type": "markdown", + "id": "406f8f8d-b590-46f4-a230-626e32e52c71", + "metadata": {}, + "source": [ + "Control the overall dimensions of the figure with `size`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fefc2b45-3510-4cd7-9de9-4806d71fc4c1", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot().layout(size=(4, 4))\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "909a47bb-82f5-455a-99c3-7049d548561b", + "metadata": {}, + "source": [ + "Subplots created by using :meth:`Plot.facet` or :meth:`Plot.pair` will shrink to fit in the available space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3163687c-8d48-4e88-8dc2-35e16341e30e", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet([\"A\", \"B\"], [\"X\", \"Y\"])" + ] + }, + { + "cell_type": "markdown", + "id": "feda7c3a-3862-48d4-bb18-419cd03fc081", + "metadata": {}, + "source": [ + "You may find that different automatic layout engines give better or worse results with specific plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2107939-c6a9-414c-b3a2-6f5d0dd60daf", + "metadata": {}, + "outputs": [], + "source": [ + "p.facet([\"A\", \"B\"], [\"X\", \"Y\"]).layout(engine=\"constrained\")" + ] + }, + { + "cell_type": "markdown", + "id": "d61054d1-dcef-4e11-9802-394bcc633f9f", + "metadata": {}, + "source": [ + "With `extent`, you can control the size of the plot relative to the underlying figure. Because the notebook display adapts the figure background to the plot, this appears only to change the plot size in a notebook context. But it can be useful when saving or displaying through a `pyplot` GUI window:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b5d5969-2925-474f-8e3c-99e4f90a7a2b", + "metadata": {}, + "outputs": [], + "source": [ + "p.layout(extent=[0, 0, .8, 1]).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5c41b7d-a064-4406-8571-a544b194f3dc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.limit.ipynb b/doc/_docstrings/objects.Plot.limit.ipynb new file mode 100644 index 0000000000..6d1ec6084d --- /dev/null +++ b/doc/_docstrings/objects.Plot.limit.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so" + ] + }, + { + "cell_type": "raw", + "id": "1888667e-8761-4c32-9510-68e08e64f21d", + "metadata": {}, + "source": [ + "By default, plot limits are automatically set to provide a small margin around the data (controlled by :meth:`Plot.theme` parameters `axes.xmargin` and `axes.ymargin`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ec46d9-3c60-4962-b182-a2b2c8310305", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(x=[1, 2, 3], y=[1, 3, 2]).add(so.Line(marker=\"o\"))\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "5f5c19d8-4104-4df0-ae45-9a8ac96d024e", + "metadata": {}, + "source": [ + "Pass a `min`/`max` tuple to pin the limits at specific values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "804388c5-5efa-4cfb-92d8-97fdf838ae5e", + "metadata": {}, + "outputs": [], + "source": [ + "p.limit(x=(0, 4), y=(-1, 6))" + ] + }, + { + "cell_type": "markdown", + "id": "49634203-4c77-42ae-abc1-b182671f305e", + "metadata": {}, + "source": [ + "Reversing the `min`/`max` values will invert the axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ea1c82c-a9bc-43cc-ba75-5ee28923b8f2", + "metadata": {}, + "outputs": [], + "source": [ + "p.limit(y=(4, 0))" + ] + }, + { + "cell_type": "raw", + "id": "9bb25c70-3960-4a81-891c-2bd299e7b24f", + "metadata": {}, + "source": [ + "Use `None` for either side to maintain the default value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0566ba8-707c-4808-9a76-525ccaef7a42", + "metadata": {}, + "outputs": [], + "source": [ + "p.limit(y=(0, None))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fefc2b45-3510-4cd7-9de9-4806d71fc4c1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.on.ipynb b/doc/_docstrings/objects.Plot.on.ipynb new file mode 100644 index 0000000000..f297bf631f --- /dev/null +++ b/doc/_docstrings/objects.Plot.on.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "fb8e120d-5dcf-483b-a0d1-74857d09ce7d", + "metadata": {}, + "source": [ + ".. currentmodule:: seaborn.objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "%config InlineBackend.figure_format = \"retina\"\n", + "import seaborn as sns\n", + "import seaborn.objects as so\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "3445ed22-7a6a-4f91-8914-49bb1af023cb", + "metadata": {}, + "source": [ + "Passing a :class:`matplotlib.axes.Axes` object provides functionality closest to seaborn's axes-level plotting functions. Notice how the resulting image looks different from others created with :class:`Plot`. This is because the plot theme uses the global rcParams at the time the axes were created, rather than :class:`Plot` defaults:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b816b0b1-b861-404e-bec6-9b2b0844ea5a", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(diamonds, \"carat\", \"price\").add(so.Dots())\n", + "f, ax = plt.subplots()\n", + "p.on(ax).show()" + ] + }, + { + "cell_type": "raw", + "id": "ce3aa102-50fe-44ce-9e06-e25d14b410f1", + "metadata": {}, + "source": [ + "Alternatively, calling :func:`matplotlib.pyplot.figure` will defer axes creation to :class:`Plot`, which will apply the default theme (and any customizations specified with :meth:`Plot.theme`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52eefae9-d08e-48fb-a15b-27920609d53b", + "metadata": {}, + "outputs": [], + "source": [ + "f = plt.figure()\n", + "p.on(f).show()" + ] + }, + { + "cell_type": "raw", + "id": "171fa466-1f7a-4c5e-8a12-61edb3f11e4a", + "metadata": {}, + "source": [ + "Creating a :class:`matplotlib.figure.Figure` object will bypass `pyplot` altogether. This may be useful for embedding :class:`Plot` figures in a GUI application:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bba83103-ab74-4e3c-b16e-77644f4c0431", + "metadata": {}, + "outputs": [], + "source": [ + "f = mpl.figure.Figure()\n", + "p.on(f).plot()" + ] + }, + { + "cell_type": "raw", + "id": "4cce3d40-acea-4f5c-87c4-56666480d2fe", + "metadata": {}, + "source": [ + "Using :class:`Plot.on` also provides access to the underlying matplotlib objects, which may be useful for deep customization. But it requires a careful attention to the order of operations by which the :class:`Plot` is specified, compiled, customized, and displayed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91823d24-8269-4b72-abeb-38201eb2db3f", + "metadata": {}, + "outputs": [], + "source": [ + "f = mpl.figure.Figure()\n", + "res = p.on(f).plot()\n", + "\n", + "ax = f.axes[0]\n", + "rect = mpl.patches.Rectangle(\n", + " xy=(0, 1), width=.4, height=.1,\n", + " color=\"C1\", alpha=.2,\n", + " transform=ax.transAxes, clip_on=False,\n", + ")\n", + "ax.add_artist(rect)\n", + "ax.text(\n", + " x=rect.get_width() / 2, y=1 + rect.get_height() / 2,\n", + " s=\"Diamonds: very sparkly!\", size=12,\n", + " ha=\"center\", va=\"center\", transform=ax.transAxes,\n", + ")\n", + "\n", + "res" + ] + }, + { + "cell_type": "raw", + "id": "61286891-25b3-4db5-8ebe-af080d5c5f31", + "metadata": {}, + "source": [ + "Matplotlib 3.4 introduced the concept of :meth:`matplotlib.figure.Figure.subfigures`, which make it easier to composite multiple arrangements of subplots. These can also be passed to :meth:`Plot.on`, " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca19a28e-7a49-46b3-a727-a26f4a1099c3", + "metadata": {}, + "outputs": [], + "source": [ + "f = mpl.figure.Figure(figsize=(7, 4), dpi=100, layout=\"constrained\")\n", + "sf1, sf2 = f.subfigures(1, 2)\n", + "\n", + "p.on(sf1).plot()\n", + "(\n", + " so.Plot(diamonds, x=\"price\")\n", + " .add(so.Bars(), so.Hist())\n", + " .facet(row=\"cut\")\n", + " .scale(x=\"log\")\n", + " .share(y=False)\n", + " .on(sf2)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ecd4166-939d-4925-92be-bf886a16ae94", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.pair.ipynb b/doc/_docstrings/objects.Plot.pair.ipynb new file mode 100644 index 0000000000..c31240f57f --- /dev/null +++ b/doc/_docstrings/objects.Plot.pair.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "ac7814b6-1e2c-4f0e-991b-7fe78fca4346", + "metadata": {}, + "source": [ + ".. currentmodule:: seaborn.objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "mpg = load_dataset(\"mpg\")" + ] + }, + { + "cell_type": "markdown", + "id": "a6ee48da-ff1e-41eb-95ec-9f2dd12bdb63", + "metadata": {}, + "source": [ + "Plot one dependent variable against multiple independent variables by assigning `y` and pairing on `x`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56ab58b6-ccdf-4938-a8e0-cbe2de8d6749", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg, y=\"acceleration\")\n", + " .pair(x=[\"displacement\", \"weight\"])\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c37e0543-d022-4079-b58a-8f8af90b29c8", + "metadata": {}, + "source": [ + "Show multiple pairwise relationships by passing lists to both `x` and `y`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b5298d-d578-4284-8fab-415d2c03022d", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg)\n", + " .pair(x=[\"displacement\", \"weight\"], y=[\"horsepower\", \"acceleration\"])\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "09bf54ad-bf55-4e26-8566-5af62bf29c51", + "metadata": {}, + "source": [ + "When providing lists for both `x` and `y`, pass `cross=False` to pair each position in the list rather than showing all pairwise relationships:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c70ca7d8-79ee-4c7a-ae91-2088e965b1f4", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg)\n", + " .pair(\n", + " x=[\"weight\", \"acceleration\"],\n", + " y=[\"displacement\", \"horsepower\"],\n", + " cross=False,\n", + " )\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "79beadec-038d-40f0-8783-749474d48eac", + "metadata": {}, + "source": [ + "When plotting against several `x` or `y` variables, it is possible to `wrap` the subplots to produce a two-dimensional grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bf2d87f-a940-426c-bdff-8bf80696b7a1", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg, y=\"mpg\")\n", + " .pair(x=[\"displacement\", \"weight\", \"horsepower\", \"cylinders\"], wrap=2)\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6304faed-2466-49eb-a8c2-d9d635938b78", + "metadata": {}, + "source": [ + "Pairing can be combined with faceting, either pairing on `y` and faceting on `col` or pairing on `x` and faceting on `row`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea235cd-e9c1-4119-a683-871e60b149ec", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg, x=\"weight\")\n", + " .pair(y=[\"horsepower\", \"acceleration\"])\n", + " .facet(col=\"origin\")\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ded931d2-95f1-4e09-8e24-f8b687f8f052", + "metadata": {}, + "source": [ + "While typically convenient to assign pairing variables as references to the common `data`, it's also possible to pass a list of vectors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66e0cb77-094b-4144-b086-15bab106ca9f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg[\"weight\"])\n", + " .pair(y=[mpg[\"horsepower\"], mpg[\"acceleration\"]])\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "7bef3310-87f6-44f6-be6a-e30effaa7a70", + "metadata": {}, + "source": [ + "When customizing the plot through methods like :meth:`Plot.label`, :meth:`Plot.limit`, or :meth:`Plot.scale`, you can refer to the individual coordinate variables as `x0`, `x1`, etc.:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6ce8868-55c0-4c44-8fed-937771b762ee", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(mpg, y=\"mpg\")\n", + " .pair(x=[\"weight\", \"displacement\"])\n", + " .label(x0=\"Weight (lb)\", x1=\"Displacement (cu in)\", y=\"MPG\")\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "358d409f-8b7c-4901-8eec-b2cf51731483", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.scale.ipynb b/doc/_docstrings/objects.Plot.scale.ipynb new file mode 100644 index 0000000000..b4c11680ec --- /dev/null +++ b/doc/_docstrings/objects.Plot.scale.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "diamonds = load_dataset(\"diamonds\")\n", + "mpg = load_dataset(\"mpg\").query(\"cylinders in [4, 6, 8]\")" + ] + }, + { + "cell_type": "raw", + "id": "bd43bcc6-b060-49c2-a429-8ea0ab046e2c", + "metadata": {}, + "source": [ + "Passing the name of a function, such as `\"log\"` or `\"symlog\"` will set the scale's transform:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84b84cc1-ef1c-461e-b4af-4ce6e99886d1", + "metadata": {}, + "outputs": [], + "source": [ + "p1 = so.Plot(diamonds, x=\"carat\", y=\"price\")\n", + "p1.add(so.Dots()).scale(y=\"log\")" + ] + }, + { + "cell_type": "raw", + "id": "b5ea9f7f-c776-48af-a4be-0053c3c12036", + "metadata": {}, + "source": [ + "String arguments can also specify the the name of a palette that defines the output values (or \"range\") of the scale:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1f64d2f-6abd-48aa-9bab-c3e4614d0302", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dots(), color=\"clarity\").scale(color=\"crest\")" + ] + }, + { + "cell_type": "raw", + "id": "37df8672-33b1-49a8-b702-a87c8b95db99", + "metadata": {}, + "source": [ + "The scale's range can alternatively be specified as a tuple of min/max values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "371b8abd-ddfb-42f9-b730-f75b0e7b5fd6", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dots(), pointsize=\"carat\").scale(pointsize=(2, 10))" + ] + }, + { + "cell_type": "raw", + "id": "f0c4ead3-e950-48e4-9c81-c8734a8458d0", + "metadata": {}, + "source": [ + "The tuple format can also be used for a color scale:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "678fd8b2-b031-4ec6-a567-a6711f722cbd", + "metadata": {}, + "outputs": [], + "source": [ + "p1.add(so.Dots(), color=\"carat\").scale(color=(\".4\", \"#68d\"))" + ] + }, + { + "cell_type": "raw", + "id": "b6445ab7-2ec1-40be-95bc-9df0a5750bf5", + "metadata": {}, + "source": [ + "For more control pass a scale object, such as :class:`Continuous`, which allows you to specify the input domain (`norm`), output range (`values`), and nonlinear transform (`trans`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6a219ef-b50e-442e-82e9-8ae9e2cdb825", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "(\n", + " p1.add(so.Dots(), color=\"carat\")\n", + " .scale(color=so.Continuous((\".4\", \"#68d\"), norm=(1, 3), trans=\"sqrt\"))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "737e73a9-a0d5-4311-8c5c-4ca42f9194bf", + "metadata": { + "tags": [] + }, + "source": [ + "The scale objects also offer an interface for configuring the location of the scale ticks (including in the legend) and the formatting of the tick labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfaa426a-1a97-4b6f-91b6-ee378eabf194", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p1.add(so.Dots(), color=\"price\")\n", + " .scale(\n", + " x=so.Continuous(trans=\"sqrt\").tick(every=.5),\n", + " y=so.Continuous().label(like=\"${x:g}\"),\n", + " color=so.Continuous(\"ch:.2\").tick(upto=4).label(unit=\"\"),\n", + " )\n", + " .label(y=\"\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "d4013795-fd5d-4a53-b145-e87f876a0684", + "metadata": {}, + "source": [ + "If the scale includes a nonlinear transform, it will be applied *before* any statistical transforms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9bf321f-c482-4d25-bb3b-7c499930b0d1", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " p1.add(so.Dots(color=\".7\"))\n", + " .add(so.Line(), so.PolyFit(order=2))\n", + " .scale(y=\"log\")\n", + " .limit(y=(250, 25000))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "00ac5844-efb1-4683-a8ff-e864d0c68dff", + "metadata": {}, + "source": [ + "The scale is also relevant for when numerical data should be treated as categories. Consider the following histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04d5e6ae-30b2-495b-be1a-d99d6ffd4f44", + "metadata": {}, + "outputs": [], + "source": [ + "p2 = so.Plot(mpg, \"cylinders\").add(so.Bar(), so.Hist())\n", + "p2" + ] + }, + { + "cell_type": "raw", + "id": "9b3dafad-aae0-4862-b1b2-bb76b75a9cec", + "metadata": {}, + "source": [ + "By default, the plot gives `cylinders` a continuous scale, since it is a vector of floats. But assigning a :class:`Nominal` scale causes the histogram to bin observations properly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f89331a-69fc-4714-adfb-0568690c1b66", + "metadata": {}, + "outputs": [], + "source": [ + "p2.scale(x=so.Nominal())" + ] + }, + { + "cell_type": "raw", + "id": "78880057-f4a7-40a1-a619-20d4b3be34dc", + "metadata": {}, + "source": [ + "The default behavior for semantic mappings also depends on input data types and can be modified by the scale. Consider the sequential mapping applied to the colors in this plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "653abbc6-8227-48eb-9e1d-31587e6ef46d", + "metadata": {}, + "outputs": [], + "source": [ + "p3 = (\n", + " so.Plot(mpg, \"weight\", \"acceleration\", color=\"cylinders\")\n", + " .add(so.Dot(), marker=\"origin\")\n", + ")\n", + "p3" + ] + }, + { + "cell_type": "raw", + "id": "6ce5c9a8-5051-43b1-973c-fb9fb35ba399", + "metadata": {}, + "source": [ + "Passing the name of a qualitative palette will select a :class:`Nominal` scale:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "218d6619-1fe3-4412-a2fc-efed4f542db7", + "metadata": {}, + "outputs": [], + "source": [ + "p3.scale(color=\"deep\")" + ] + }, + { + "cell_type": "raw", + "id": "d2362247-6e0e-48fb-bbe4-2149f96785ae", + "metadata": {}, + "source": [ + "A :class:`Nominal` scale is also implied when the output values are given as a list or dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bdf57da-cb05-4347-87ec-fac2c3763f12", + "metadata": {}, + "outputs": [], + "source": [ + "p3.scale(\n", + " color=[\"#49b\", \"#a6a\", \"#5b8\"],\n", + " marker={\"japan\": \".\", \"europe\": \"+\", \"usa\": \"*\"},\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a7d92be7-9e96-4850-a26a-090c5ae9857b", + "metadata": {}, + "source": [ + "Pass a :class:`Nominal` object directly to control the order of the category mappings:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3c7eeb9-351f-484d-b0af-e18341569de3", + "metadata": {}, + "outputs": [], + "source": [ + "p3.scale(\n", + " color=so.Nominal([\"#008fd5\", \"#fc4f30\", \"#e5ae38\"]),\n", + " marker=so.Nominal(order=[\"japan\", \"europe\", \"usa\"])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8885056-fd98-4964-a4a1-8c0344960409", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.share.ipynb b/doc/_docstrings/objects.Plot.share.ipynb new file mode 100644 index 0000000000..097cf01bd0 --- /dev/null +++ b/doc/_docstrings/objects.Plot.share.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "3a874676-6b0d-45b1-a227-857a536c5ed2", + "metadata": {}, + "source": [ + "By default, faceted plots will share all axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "615d0765-98c7-4694-8115-a6d1b3557fe7", + "metadata": {}, + "outputs": [], + "source": [ + "p = (\n", + " so.Plot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + " .facet(col=\"species\", row=\"sex\")\n", + " .add(so.Dots())\n", + ")\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "8b75feb1-491e-4031-9fcb-619037bd1bfb", + "metadata": {}, + "source": [ + "Set a coordinate variable to `False` to let each subplot adapt independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c23c570-ca9b-49cc-9aab-7d167218454b", + "metadata": {}, + "outputs": [], + "source": [ + "p.share(x=False, y=False)" + ] + }, + { + "cell_type": "markdown", + "id": "cc46d8d0-7ab9-44c2-8a28-c656fe86c085", + "metadata": {}, + "source": [ + "It's also possible to share only across rows or columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cb8136b-9aa3-4c48-bd41-fc0e19fa997c", + "metadata": {}, + "outputs": [], + "source": [ + "p.share(x=\"col\", y=\"row\")" + ] + }, + { + "cell_type": "raw", + "id": "91533aba-45ae-4011-b72c-10f5f79e01d0", + "metadata": {}, + "source": [ + "This method is also relevant for paired plots, which have different defaults. In this case, you would need to opt *in* to full sharing (although it may not always make sense):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2b71770-e520-45b9-b41c-a66431f21e1f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, y=\"flipper_length_mm\")\n", + " .pair(x=[\"bill_length_mm\", \"bill_depth_mm\"])\n", + " .add(so.Dots())\n", + " .share(x=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92c29080-8561-4c90-8581-4d435a5f96b9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Plot.theme.ipynb b/doc/_docstrings/objects.Plot.theme.ipynb new file mode 100644 index 0000000000..df98bc456b --- /dev/null +++ b/doc/_docstrings/objects.Plot.theme.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9252d5a5-8af1-4f99-b799-ee044329fb23", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "anscombe = load_dataset(\"anscombe\")" + ] + }, + { + "cell_type": "raw", + "id": "406f6608-daf2-4d3e-9f2c-1a9e93ecb840", + "metadata": {}, + "source": [ + "The default theme uses the same parameters as :func:`seaborn.set_theme` with no additional arguments:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e3d639c-1167-48d2-b9b5-c26b7fa12c66", + "metadata": {}, + "outputs": [], + "source": [ + "p = (\n", + " so.Plot(anscombe, \"x\", \"y\", color=\"dataset\")\n", + " .facet(\"dataset\", wrap=2)\n", + " .add(so.Line(), so.PolyFit(order=1))\n", + " .add(so.Dot())\n", + ")\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "e2823a91-47f1-40a8-a150-32f00bcb59ea", + "metadata": {}, + "source": [ + "Pass a dictionary of rc parameters to change the appearance of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "368c8cdb-2e6f-4520-8412-cd1864a6c09b", + "metadata": {}, + "outputs": [], + "source": [ + "p.theme({\"axes.facecolor\": \"w\", \"axes.edgecolor\": \"slategray\"})" + ] + }, + { + "cell_type": "raw", + "id": "637cf0ba-e9b7-4f0f-a628-854e300c4122", + "metadata": {}, + "source": [ + "Many (though not all) mark properties will reflect theme parameters by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9eb330b3-f424-405b-9653-5df9948792d9", + "metadata": {}, + "outputs": [], + "source": [ + "p.theme({\"lines.linewidth\": 4})" + ] + }, + { + "cell_type": "raw", + "id": "0186e852-9c47-4da1-999a-f61f41687dfb", + "metadata": {}, + "source": [ + "Apply seaborn styles by passing in the output of the style functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48cafbb1-37da-42c7-a20e-b63c0fef4d41", + "metadata": {}, + "outputs": [], + "source": [ + "from seaborn import axes_style\n", + "p.theme(axes_style(\"ticks\"))" + ] + }, + { + "cell_type": "raw", + "id": "bbdecb4b-382a-49f3-8928-16f5f72c39b5", + "metadata": {}, + "source": [ + "Or apply styles that ship with matplotlib:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84a7ac28-798d-4560-bbc8-d214fd6fcada", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import style\n", + "p.theme(style.library[\"fivethirtyeight\"])" + ] + }, + { + "cell_type": "raw", + "id": "e1870ad0-48a0-4fd1-a557-d337979bc845", + "metadata": {}, + "source": [ + "Multiple parameter dictionaries should be passed to the same function call. On Python 3.9+, you can use dictionary union syntax for this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dec4db5b-1b2b-4b9d-97e1-9cf0f20d6b83", + "metadata": {}, + "outputs": [], + "source": [ + "from seaborn import plotting_context\n", + "p.theme(axes_style(\"whitegrid\") | plotting_context(\"talk\"))" + ] + }, + { + "cell_type": "raw", + "id": "7cc09720-887d-463e-a162-1e3ef8a46ad9", + "metadata": {}, + "source": [ + "The default theme for all :class:`Plot` instances can be changed using the :attr:`Plot.config` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e535ddf-d394-4ce1-8d09-4dc95ca314b4", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.theme.update(axes_style(\"white\"))\n", + "p" + ] + }, + { + "cell_type": "raw", + "id": "2f19f645-3f8d-4044-82e9-4a87165a0078", + "metadata": {}, + "source": [ + "See :ref:`Plot Configuration ` for more details." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Range.ipynb b/doc/_docstrings/objects.Range.ipynb new file mode 100644 index 0000000000..3e462255fb --- /dev/null +++ b/doc/_docstrings/objects.Range.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2923956c-f141-4ecb-ab08-e819099f0fa9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")" + ] + }, + { + "cell_type": "raw", + "id": "576cbc86-f869-47b5-a98f-6ee727287a8b", + "metadata": {}, + "source": [ + "This mark will often be used in the context of a stat transform that adds an errorbar interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6217b85-7479-49fd-aeda-9f435aa0473a", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"body_mass_g\", y=\"species\", color=\"sex\")\n", + " .add(so.Dot(), so.Agg(), so.Dodge())\n", + " .add(so.Range(), so.Est(errorbar=\"sd\"), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e156ea24-d8b4-4d67-acb5-750034be4dde", + "metadata": {}, + "source": [ + "One feature (or potential gotcha) is that the mark will pick up properties like `linestyle` and `linewidth`; exclude those properties from the relevant layer if this behavior is undesired:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bb63ebb-7733-4313-844c-cb7613298da3", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"sex\", y=\"body_mass_g\", linestyle=\"species\")\n", + " .facet(\"species\")\n", + " .add(so.Line(marker=\"o\"), so.Agg())\n", + " .add(so.Range(), so.Est(errorbar=\"sd\"))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "5387e049-b343-49ea-a943-7dd9c090f184", + "metadata": {}, + "source": [ + "It's also possible to directly assign the minimum and maximum values for the range:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e795770-4481-4e23-a49b-e828a1f5cbbd", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " penguins\n", + " .rename_axis(index=\"penguin\")\n", + " .pipe(so.Plot, x=\"penguin\", ymin=\"bill_depth_mm\", ymax=\"bill_length_mm\")\n", + " .add(so.Range(), color=\"island\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2191bec6-a02e-48e0-b92c-69c38826049d", + "metadata": {}, + "source": [ + "When `min`/`max` variables are neither computed as part of a transform or explicitly assigned, the range will cover the full extent of the data at each unique observation on the orient axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63c6352e-4ef5-4cff-940e-35fa5804b2c7", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"sex\", y=\"body_mass_g\")\n", + " .facet(\"species\")\n", + " .add(so.Dots(pointsize=6))\n", + " .add(so.Range(linewidth=2))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c215deb1-e510-4631-b999-737f5f41cae2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Shift.ipynb b/doc/_docstrings/objects.Shift.ipynb new file mode 100644 index 0000000000..e33c90c959 --- /dev/null +++ b/doc/_docstrings/objects.Shift.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2605c8d0-5872-4dff-9172-db81fac1cee1", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "penguins = load_dataset(\"penguins\")\n", + "diamonds = load_dataset(\"diamonds\")" + ] + }, + { + "cell_type": "raw", + "id": "e70d701a-cd7c-4b38-aaa0-4729e2be56d9", + "metadata": {}, + "source": [ + "Use this transform to layer multiple marks that would otherwise overlap and be hard to interpret:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ea7a2c4-cb69-4ad0-8ea8-73067b756371", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, \"species\", \"body_mass_g\")\n", + " .add(so.Dots(), so.Jitter())\n", + " .add(so.Range(), so.Perc([25, 75]), so.Shift(x=.2))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "940b87b2-04fb-40ba-a62f-52f461039ab9", + "metadata": {}, + "source": [ + "For y variables with a nominal scale, bear in mind that the axis will be inverted and a positive shift will move downwards:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54b5f728-4fbc-474a-8865-0f58d0ad9b0b", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, \"carat\", \"clarity\")\n", + " .add(so.Dots(), so.Jitter())\n", + " .add(so.Range(), so.Perc([25, 75]), so.Shift(y=.25))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78d9bb6a-ea3d-491e-b43e-25efd386bd59", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Stack.ipynb b/doc/_docstrings/objects.Stack.ipynb new file mode 100644 index 0000000000..7878db9a6a --- /dev/null +++ b/doc/_docstrings/objects.Stack.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "87244f49-8cf2-4668-a556-a8c7828b31bf", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "titanic = load_dataset(\"titanic\").sort_values(\"alive\", ascending=False)" + ] + }, + { + "cell_type": "raw", + "id": "c9a1a7db-f365-4c5f-85ae-1f00e15b0af9", + "metadata": {}, + "source": [ + "This transform applies a vertical shift to eliminate overlap between marks with a baseline, such as :class:`Bar` or :class:`Area`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07579f71-842d-4dc1-98ab-38652409238d", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot(titanic, x=\"class\", color=\"sex\").add(so.Bar(), so.Count(), so.Stack())" + ] + }, + { + "cell_type": "raw", + "id": "2488a821-3bf1-4bb9-9963-bf726d11925c", + "metadata": {}, + "source": [ + "Stacking can make it much harder to compare values between groups that get shifted, but it can work well when depicting a part-whole relationship:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcb8ea58-3cf2-455b-b6b7-98b434f2f152", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(titanic, x=\"age\", alpha=\"alive\")\n", + " .facet(\"sex\")\n", + " .add(so.Bars(), so.Hist(binwidth=10), so.Stack())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b649198f-898e-4103-84bc-d74de71de5a7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/objects.Text.ipynb b/doc/_docstrings/objects.Text.ipynb new file mode 100644 index 0000000000..4d8f3204af --- /dev/null +++ b/doc/_docstrings/objects.Text.ipynb @@ -0,0 +1,188 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "cd1cdefe-b8c1-40b9-be31-006d52ec9f18", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn.objects as so\n", + "from seaborn import load_dataset\n", + "glue = (\n", + " load_dataset(\"glue\")\n", + " .pivot(index=[\"Model\", \"Encoder\"], columns=\"Task\", values=\"Score\")\n", + " .assign(Average=lambda x: x.mean(axis=1).round(1))\n", + " .sort_values(\"Average\", ascending=False)\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "3e49ffb1-8778-4cd5-80d6-9d7e1438bc9c", + "metadata": {}, + "source": [ + "Add text at x/y locations on the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bf21068-d39e-436c-8deb-aa1b15aeb2b3", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"SST-2\", y=\"MRPC\", text=\"Model\")\n", + " .add(so.Text())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a4b9a8b2-6603-46db-9ede-3b3fb45e0e64", + "metadata": {}, + "source": [ + "Add bar annotations, horizontally-aligned with `halign`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f68501f0-c868-439e-9485-d71cca86ea47", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"Average\", y=\"Model\", text=\"Average\")\n", + " .add(so.Bar())\n", + " .add(so.Text(color=\"w\", halign=\"right\"))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a9d39479-0afa-477b-8403-fe92a54643c9", + "metadata": {}, + "source": [ + "Fine-tune the alignment using `offset`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5da4a9d-79f3-4c11-bab3-f89da8512ce4", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"Average\", y=\"Model\", text=\"Average\")\n", + " .add(so.Bar())\n", + " .add(so.Text(color=\"w\", halign=\"right\", offset=6))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e9c43798-70d5-42b5-bd91-b85684d1b671", + "metadata": {}, + "source": [ + "Add text above dots, mapping the text color with a third variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2d26ebc-24ac-4531-9ba2-fa03720c58bc", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"SST-2\", y=\"MRPC\", color=\"Encoder\", text=\"Model\")\n", + " .add(so.Dot())\n", + " .add(so.Text(valign=\"bottom\"))\n", + "\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "f31aaa38-6728-4299-8422-8762c52c9857", + "metadata": {}, + "source": [ + "Map the text alignment for better use of space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf4bbf0c-0c5f-4c31-b971-720ea8910918", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"RTE\", y=\"MRPC\", color=\"Encoder\", text=\"Model\")\n", + " .add(so.Dot())\n", + " .add(so.Text(), halign=\"Encoder\")\n", + " .scale(halign={\"LSTM\": \"left\", \"Transformer\": \"right\"})\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a5de35a6-1ccf-4958-8013-edd9ed1cd4b0", + "metadata": {}, + "source": [ + "Use additional matplotlib parameters to control the appearance of the text:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c4be188-1614-4c19-9bd7-b07e986f6a23", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(glue, x=\"RTE\", y=\"MRPC\", color=\"Encoder\", text=\"Model\")\n", + " .add(so.Dot())\n", + " .add(so.Text({\"fontweight\": \"bold\"}), halign=\"Encoder\")\n", + " .scale(halign={\"LSTM\": \"left\", \"Transformer\": \"right\"})\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95fb7aee-090a-4415-917c-b5258d2b298b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/pairplot.ipynb b/doc/_docstrings/pairplot.ipynb new file mode 100644 index 0000000000..7aa8d45b86 --- /dev/null +++ b/doc/_docstrings/pairplot.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"ticks\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The simplest invocation uses :func:`scatterplot` for each pairing of the variables and :func:`histplot` for the marginal plots along the diagonal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.pairplot(penguins)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a ``hue`` variable adds a semantic mapping and changes the default marginal plot to a layered kernel density estimate (KDE):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's possible to force marginal histograms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, hue=\"species\", diag_kind=\"hist\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``kind`` parameter determines both the diagonal and off-diagonal plotting style. Several options are available, including using :func:`kdeplot` to draw KDEs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or :func:`histplot` to draw both bivariate and univariate histograms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, kind=\"hist\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``markers`` parameter applies a style mapping on the off-diagonal axes. Currently, it will be redundant with the ``hue`` variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, hue=\"species\", markers=[\"o\", \"s\", \"D\"])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As with other figure-level functions, the size of the figure is controlled by setting the ``height`` of each individual subplot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, height=1.5)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use ``vars`` or ``x_vars`` and ``y_vars`` to select the variables to plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(\n", + " penguins,\n", + " x_vars=[\"bill_length_mm\", \"bill_depth_mm\", \"flipper_length_mm\"],\n", + " y_vars=[\"bill_length_mm\", \"bill_depth_mm\"],\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Set ``corner=True`` to plot only the lower triangle:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins, corner=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``plot_kws`` and ``diag_kws`` parameters accept dicts of keyword arguments to customize the off-diagonal and diagonal plots, respectively:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(\n", + " penguins,\n", + " plot_kws=dict(marker=\"+\", linewidth=1),\n", + " diag_kws=dict(fill=False),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The return object is the underlying :class:`PairGrid`, which can be used to further customize the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.pairplot(penguins, diag_kind=\"kde\")\n", + "g.map_lower(sns.kdeplot, levels=4, color=\".2\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/plotting_context.ipynb b/doc/_docstrings/plotting_context.ipynb new file mode 100644 index 0000000000..43009c2aa7 --- /dev/null +++ b/doc/_docstrings/plotting_context.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "perceived-worry", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "id": "seventh-volleyball", + "metadata": {}, + "source": [ + "Calling with no arguments will return the current defaults for the parameters that get scaled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "roman-villa", + "metadata": { + "tags": [ + "show-output" + ] + }, + "outputs": [], + "source": [ + "sns.plotting_context()" + ] + }, + { + "cell_type": "markdown", + "id": "handled-texas", + "metadata": {}, + "source": [ + "Calling with the name of a predefined style will show those values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "distant-caribbean", + "metadata": { + "tags": [ + "show-output" + ] + }, + "outputs": [], + "source": [ + "sns.plotting_context(\"talk\")" + ] + }, + { + "cell_type": "markdown", + "id": "lightweight-anime", + "metadata": {}, + "source": [ + "Use the function as a context manager to temporarily change the parameter values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "contemporary-hampshire", + "metadata": {}, + "outputs": [], + "source": [ + "with sns.plotting_context(\"talk\"):\n", + " sns.lineplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "accompanied-brisbane", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/pointplot.ipynb b/doc/_docstrings/pointplot.ipynb new file mode 100644 index 0000000000..efa792215e --- /dev/null +++ b/doc/_docstrings/pointplot.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "43f842ee-44c9-476b-ab08-112d23e2effb", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")\n", + "penguins = sns.load_dataset(\"penguins\")\n", + "flights = sns.load_dataset(\"flights\")" + ] + }, + { + "cell_type": "raw", + "id": "f25d3647-9fad-47b2-b49d-db6f5b5c3795", + "metadata": {}, + "source": [ + "Group by a categorical variable and plot aggregated values, with confidence intervals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a865fec-c034-4000-938d-b7cd89157495", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(data=penguins, x=\"island\", y=\"body_mass_g\")" + ] + }, + { + "cell_type": "raw", + "id": "c65257ad-c87f-4b78-9b6c-cf792a691598", + "metadata": {}, + "source": [ + "Add a second layer of grouping and differentiate with color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f27011f1-0e3c-4dc4-818e-4a77930977b9", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(data=penguins, x=\"island\", y=\"body_mass_g\", hue=\"sex\")" + ] + }, + { + "cell_type": "raw", + "id": "d51a887c-1f64-4ddf-af31-0476a983818b", + "metadata": {}, + "source": [ + "Redundantly code the `hue` variable using the markers and linestyles for better accessibility:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bfb8bc1-6f9a-49a1-8b1d-6bcc992cb249", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(\n", + " data=penguins,\n", + " x=\"island\", y=\"body_mass_g\", hue=\"sex\",\n", + " markers=[\"o\", \"s\"], linestyles=[\"-\", \"--\"],\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "44a11b7a-6847-4225-906e-58bbb56c6966", + "metadata": {}, + "source": [ + "Use the error bars to represent the standard deviation of each distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "386b25eb-7ab7-4a1d-9498-cef3e4fd3e6b", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(data=penguins, x=\"island\", y=\"body_mass_g\", errorbar=\"sd\")" + ] + }, + { + "cell_type": "raw", + "id": "7490d4b8-d2ca-4cad-9ba3-5862aafb8165", + "metadata": {}, + "source": [ + "Customize the appearance of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50b14810-2299-479c-b6c5-0fd10c4ed3de", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(\n", + " data=penguins, x=\"body_mass_g\", y=\"island\",\n", + " errorbar=(\"pi\", 100), capsize=.4,\n", + " color=\".5\", linestyle=\"none\", marker=\"D\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "479e4e0c-42c9-4d79-88eb-e397840a7e78", + "metadata": {}, + "source": [ + "\"Dodge\" the artists along the categorical axis to reduce overplotting:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f94d069-c5f4-4579-a4bf-6d755962d48d", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(data=penguins, x=\"sex\", y=\"bill_depth_mm\", hue=\"species\", dodge=True)" + ] + }, + { + "cell_type": "raw", + "id": "00273ada-cd12-410a-a268-38243d6514ae", + "metadata": {}, + "source": [ + "Dodge by a specific amount, relative to the width allotted for each level:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94d6718d-2cfe-44f4-88e5-f47461d7d51f", + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(\n", + " data=penguins, x=\"species\", y=\"bill_depth_mm\", hue=\"sex\",\n", + " dodge=True, alpha=.2, legend=False,\n", + ")\n", + "sns.pointplot(\n", + " data=penguins, x=\"species\", y=\"bill_depth_mm\", hue=\"sex\",\n", + " dodge=.4, linestyle=\"none\", errorbar=None,\n", + " marker=\"_\", markersize=20, markeredgewidth=3,\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e205e7c8-6b11-44e6-b43f-7416c427215d", + "metadata": {}, + "source": [ + "When variables are not explicitly assigned and the dataset is two-dimensional, the plot will aggregate over each column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e721e3b7-25c8-4e9c-a748-1c36b06d1100", + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide = flights.pivot(index=\"year\", columns=\"month\", values=\"passengers\")\n", + "sns.pointplot(flights_wide)" + ] + }, + { + "cell_type": "raw", + "id": "0d2d7811-06e3-4882-86e3-225071c864f7", + "metadata": {}, + "source": [ + "With one-dimensional data, each value is plotted (relative to its key or index when available):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd91ddc-2a27-4b05-80fa-00ddcf1ae63e", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(flights_wide[\"Jun\"])" + ] + }, + { + "cell_type": "raw", + "id": "573c2ba7-1e46-494d-9076-19b1c04b58c1", + "metadata": {}, + "source": [ + "Control the formatting of the categorical variable as it appears in the tick labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7af6ab85-bfb1-42c2-8c68-2c91f22968d6", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(flights_wide[\"Jun\"], formatter=lambda x: f\"'{x % 1900}\")" + ] + }, + { + "cell_type": "raw", + "id": "c319e82e-1387-4c2b-8daf-3b7174cad180", + "metadata": {}, + "source": [ + "Or preserve the native scale of the grouping variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e92e8af9-b734-4e4d-a240-7f3982fcfbcc", + "metadata": {}, + "outputs": [], + "source": [ + "ax = sns.pointplot(flights_wide[\"Jun\"], native_scale=True)\n", + "ax.plot(1955, 335, marker=\"*\", color=\"r\", markersize=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f88be5c-7919-48cf-a84f-a5e6ac86e888", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/regplot.ipynb b/doc/_docstrings/regplot.ipynb new file mode 100644 index 0000000000..2b1ef937a6 --- /dev/null +++ b/doc/_docstrings/regplot.ipynb @@ -0,0 +1,251 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "611aed40-d120-4fbf-b1e6-9712ed8167fc", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "sns.set_theme()\n", + "mpg = sns.load_dataset(\"mpg\")" + ] + }, + { + "cell_type": "raw", + "id": "61bebade-0c45-4e99-9567-dfe0bc2dc6e1", + "metadata": {}, + "source": [ + "Plot the relationship between two variables in a DataFrame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f4107db-d89b-46ad-a4c6-9ba1181b2122", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"weight\", y=\"acceleration\")" + ] + }, + { + "cell_type": "raw", + "id": "146225d0-2e38-4b92-8e64-6d7f78311f40", + "metadata": {}, + "source": [ + "Fit a higher-order polynomial regression to capture nonlinear trends:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba29488c-8a45-4387-bfb1-71a584fa1b3d", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"weight\", y=\"mpg\", order=2)" + ] + }, + { + "cell_type": "raw", + "id": "0ad71f54-b362-465e-8780-1d8b99ff2d51", + "metadata": {}, + "source": [ + "Alternatively, fit a log-linear regression:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aae2acaa-ed07-4568-97d2-8665603eb7eb", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"displacement\", y=\"mpg\", logx=True)" + ] + }, + { + "cell_type": "raw", + "id": "eef37c8a-7190-465c-b963-076ec17e1b3a", + "metadata": {}, + "source": [ + "Or use a locally-weighted (LOWESS) smoother:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9276c469-72ea-4c36-9b7c-19ecba564376", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"horsepower\", y=\"mpg\", lowess=True)" + ] + }, + { + "cell_type": "raw", + "id": "d18f1534-598e-4f08-91dd-0c4020f30b00", + "metadata": {}, + "source": [ + "Fit a logistic regression when the response variable is binary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79ec9180-10c9-4910-9713-dcd1fdd266be", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(x=mpg[\"weight\"], y=mpg[\"origin\"].eq(\"usa\").rename(\"from_usa\"), logistic=True)" + ] + }, + { + "cell_type": "raw", + "id": "2e165783-d505-4acb-a20a-d22a49965c2b", + "metadata": {}, + "source": [ + "Fit a robust regression to downweight the influence of outliers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd5cf940-de8f-4230-8b04-5c650418f3c4", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"horsepower\", y=\"weight\", robust=True)" + ] + }, + { + "cell_type": "raw", + "id": "e7d43c4e-e819-4634-8269-cbf5de4a2f24", + "metadata": {}, + "source": [ + "Disable the confidence interval for faster plotting:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b21384ff-6395-4fa9-b7da-63e8a951d8a5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"weight\", y=\"horsepower\", ci=None)" + ] + }, + { + "cell_type": "raw", + "id": "06e979ac-f418-4ead-bde1-ec684d0545ff", + "metadata": {}, + "source": [ + "Jitter the scatterplot when the `x` variable is discrete:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "543a8ace-a89e-4af9-bf6d-a8722ebdfac5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"cylinders\", y=\"weight\", x_jitter=.15)" + ] + }, + { + "cell_type": "raw", + "id": "c3042eb2-0933-4886-9bff-88c276371516", + "metadata": {}, + "source": [ + "Or aggregate over the distinct `x` values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "158c6e36-8858-415b-b78c-7d8d79879ee5", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"cylinders\", y=\"acceleration\", x_estimator=np.mean, order=2)" + ] + }, + { + "cell_type": "raw", + "id": "d9cefe7a-7f86-4353-95da-d7e72e65d4fc", + "metadata": {}, + "source": [ + "With a continuous `x` variable, bin and then aggregate:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c48829b-2e3b-4e6b-9b1d-5ba69f713617", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(data=mpg, x=\"weight\", y=\"mpg\", x_bins=np.arange(2000, 5500, 250), order=2)" + ] + }, + { + "cell_type": "raw", + "id": "dfe5a36a-20b0-4e69-b986-fede8e1506cc", + "metadata": {}, + "source": [ + "Customize the appearance of various elements:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df689a39-c5e1-4f7b-a8f9-8ffb09b95238", + "metadata": {}, + "outputs": [], + "source": [ + "sns.regplot(\n", + " data=mpg, x=\"weight\", y=\"horsepower\",\n", + " ci=99, marker=\"x\", color=\".3\", line_kws=dict(color=\"r\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d625745b-3706-447b-9224-88e6cb1eb7f9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/relplot.ipynb b/doc/_docstrings/relplot.ipynb new file mode 100644 index 0000000000..8d634045e5 --- /dev/null +++ b/doc/_docstrings/relplot.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "These examples will illustrate only some of the functionality that :func:`relplot` is capable of. For more information, consult the examples for :func:`scatterplot` and :func:`lineplot`, which are used when ``kind=\"scatter\"`` or ``kind=\"line\"``, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme(style=\"ticks\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To illustrate ``kind=\"scatter\"`` (the default style of plot), we will use the \"tips\" dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "tips.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning ``x`` and ``y`` and any semantic mapping variables will draw a single plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"day\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a ``col`` variable creates a faceted figure with multiple subplots arranged across the columns of the grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"day\", col=\"time\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Different variables can be assigned to facet on both the columns and rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"day\", col=\"time\", row=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When the variable assigned to ``col`` has many levels, it can be \"wrapped\" across multiple rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\", col=\"day\", col_wrap=2)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning multiple semantic variables can show multi-dimensional relationships, but be mindful to avoid making an overly-complicated plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\", col=\"time\",\n", + " hue=\"time\", size=\"size\", style=\"sex\",\n", + " palette=[\"b\", \"r\"], sizes=(10, 100)\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When there is a natural continuity to one of the variables, it makes more sense to show lines instead of points. To draw the figure using :func:`lineplot`, set ``kind=\"line\"``. We will illustrate this effect with the \"fmri dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fmri = sns.load_dataset(\"fmri\")\n", + "fmri.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Using ``kind=\"line\"`` offers the same flexibility for semantic mappings as ``kind=\"scatter\"``, but :func:`lineplot` transforms the data more before plotting. Observations are sorted by their ``x`` value, and repeated observations are aggregated. By default, the resulting plot shows the mean and 95% CI for each unit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, x=\"timepoint\", y=\"signal\", col=\"region\",\n", + " hue=\"event\", style=\"event\", kind=\"line\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The size and shape of the figure is parametrized by the ``height`` and ``aspect`` ratio of each individual facet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri,\n", + " x=\"timepoint\", y=\"signal\",\n", + " hue=\"event\", style=\"event\", col=\"region\",\n", + " height=4, aspect=.7, kind=\"line\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The object returned by :func:`relplot` is always a :class:`FacetGrid`, which has several methods that allow you to quickly tweak the title, labels, and other aspects of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.relplot(\n", + " data=fmri,\n", + " x=\"timepoint\", y=\"signal\",\n", + " hue=\"event\", style=\"event\", col=\"region\",\n", + " height=4, aspect=.7, kind=\"line\"\n", + ")\n", + "(g.map(plt.axhline, y=0, color=\".7\", dashes=(2, 1), zorder=0)\n", + " .set_axis_labels(\"Timepoint\", \"Percent signal change\")\n", + " .set_titles(\"Region: {col_name} cortex\")\n", + " .tight_layout(w_pad=0))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It is also possible to use wide-form data with :func:`relplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide = (\n", + " sns.load_dataset(\"flights\")\n", + " .pivot(index=\"year\", columns=\"month\", values=\"passengers\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Faceting is not an option in this case, but the plot will still take advantage of the external legend offered by :class:`FacetGrid`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=flights_wide, kind=\"line\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/residplot.ipynb b/doc/_docstrings/residplot.ipynb new file mode 100644 index 0000000000..287462f2e0 --- /dev/null +++ b/doc/_docstrings/residplot.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "776f8271-21ed-4707-a1ad-09d8c63ae95a", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme()\n", + "mpg = sns.load_dataset(\"mpg\")" + ] + }, + { + "cell_type": "raw", + "id": "85717971-adc9-45b0-9c4b-3f022d96179c", + "metadata": {}, + "source": [ + "Pass `x` and `y` to see a scatter plot of the residuals after fitting a simple regression model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aea4655-fb51-4b51-b41d-4769de50e956", + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(data=mpg, x=\"weight\", y=\"displacement\")" + ] + }, + { + "cell_type": "raw", + "id": "175b6287-9240-493f-94bc-9d18258e952b", + "metadata": {}, + "source": [ + "Structure in the residual plot can reveal a violation of linear regression assumptions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39aa84c2-d623-44be-9b0b-746f52b55fd4", + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(data=mpg, x=\"horsepower\", y=\"mpg\")" + ] + }, + { + "cell_type": "raw", + "id": "bd9641e4-8df5-4751-b261-6443888fbbfe", + "metadata": {}, + "source": [ + "Remove higher-order trends to test whether that stabilizes the residuals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a68199-1272-464b-8b85-7a309c22a4a6", + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(data=mpg, x=\"horsepower\", y=\"mpg\", order=2)" + ] + }, + { + "cell_type": "raw", + "id": "b17750af-0393-4c53-8057-bf95d0de821a", + "metadata": {}, + "source": [ + "Adding a LOWESS curve can help reveal or emphasize structure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "494359bd-47b2-426e-9c35-14b5351eec93", + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(data=mpg, x=\"horsepower\", y=\"mpg\", lowess=True, line_kws=dict(color=\"r\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/rugplot.ipynb b/doc/_docstrings/rugplot.ipynb new file mode 100644 index 0000000000..ce5da483c2 --- /dev/null +++ b/doc/_docstrings/rugplot.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a rug along one of the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set_theme()\n", + "tips = sns.load_dataset(\"tips\")\n", + "sns.kdeplot(data=tips, x=\"total_bill\")\n", + "sns.rugplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a rug along both axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Represent a third variable with hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Draw a taller rug:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Put the rug outside the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=-.02, clip_on=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the density of a larger dataset using thinner lines and alpha blending:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.scatterplot(data=diamonds, x=\"carat\", y=\"price\", s=5)\n", + "sns.rugplot(data=diamonds, x=\"carat\", y=\"price\", lw=1, alpha=.005)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/scatterplot.ipynb b/doc/_docstrings/scatterplot.ipynb new file mode 100644 index 0000000000..4b78f8eeab --- /dev/null +++ b/doc/_docstrings/scatterplot.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "These examples will use the \"tips\" dataset, which has a mixture of numeric and categorical variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "tips.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Passing long-form data and assigning ``x`` and ``y`` will draw a scatter plot between two variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a variable to ``hue`` will map its levels to the color of the points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning the same variable to ``style`` will also vary the markers and create a more accessible plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\", style=\"time\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning ``hue`` and ``style`` to different variables will vary colors and markers independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"day\", style=\"time\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If the variable assigned to ``hue`` is numeric, the semantic mapping will be quantitative and use a different default palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Pass the name of a categorical palette or explicit colors (as a Python list of dictionary) to force categorical mapping of the ``hue`` variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\", palette=\"deep\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If there are a large number of unique numeric values, the legend will show a representative, evenly-spaced set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tip_rate = tips.eval(\"tip / total_bill\").rename(\"tip_rate\")\n", + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=tip_rate)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A numeric variable can also be assigned to ``size`` to apply a semantic mapping to the areas of the points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\", size=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Control the range of marker areas with ``sizes``, and set ``legend=\"full\"`` to force every unique value to appear in the legend:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\", size=\"size\",\n", + " sizes=(20, 200), legend=\"full\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Pass a tuple of values or a :class:`matplotlib.colors.Normalize` object to ``hue_norm`` to control the quantitative hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\", size=\"size\",\n", + " sizes=(20, 200), hue_norm=(0, 7), legend=\"full\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Control the specific markers used to map the ``style`` variable by passing a Python list or dictionary of marker codes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "markers = {\"Lunch\": \"s\", \"Dinner\": \"X\"}\n", + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", style=\"time\", markers=markers)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Additional keyword arguments are passed to :meth:`matplotlib.axes.Axes.scatter`, allowing you to directly set the attributes of the plot that are not semantically mapped:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", s=100, color=\".2\", marker=\"+\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The previous examples used a long-form dataset. When working with wide-form data, each column will be plotted against its index using both ``hue`` and ``style`` mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index = pd.date_range(\"1 1 2000\", periods=100, freq=\"m\", name=\"date\")\n", + "data = np.random.randn(100, 4).cumsum(axis=0)\n", + "wide_df = pd.DataFrame(data, index, [\"a\", \"b\", \"c\", \"d\"])\n", + "sns.scatterplot(data=wide_df)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use :func:`relplot` to combine :func:`scatterplot` and :class:`FacetGrid`. This allows grouping within additional categorical variables, and plotting them across multiple subplots.\n", + "\n", + "Using :func:`relplot` is safer than using :class:`FacetGrid` directly, as it ensures synchronization of the semantic mappings across facets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\",\n", + " col=\"time\", hue=\"day\", style=\"day\",\n", + " kind=\"scatter\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/set_context.ipynb b/doc/_docstrings/set_context.ipynb new file mode 100644 index 0000000000..97c8679cb7 --- /dev/null +++ b/doc/_docstrings/set_context.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "thorough-equipment", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "id": "canadian-protection", + "metadata": {}, + "source": [ + "Call the function with the name of a context to set the default for all plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "freelance-leonard", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_context(\"notebook\")\n", + "sns.lineplot(x=[0, 1, 2], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "studied-adventure", + "metadata": {}, + "source": [ + "You can independently scale the font elements relative to the current context:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "irish-digest", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_context(\"notebook\", font_scale=1.25)\n", + "sns.lineplot(x=[0, 1, 2], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "fourth-technical", + "metadata": {}, + "source": [ + "It is also possible to override some of the parameters with specific values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "advance-request", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_context(\"notebook\", rc={\"lines.linewidth\": 3})\n", + "sns.lineplot(x=[0, 1, 2], y=[1, 3, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "compatible-string", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/set_style.ipynb b/doc/_docstrings/set_style.ipynb new file mode 100644 index 0000000000..7780bcf95a --- /dev/null +++ b/doc/_docstrings/set_style.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "practical-announcement", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "id": "suffering-emerald", + "metadata": {}, + "source": [ + "Call the function with the name of a seaborn style to set the default for all plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "collaborative-struggle", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_style(\"whitegrid\")\n", + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "defensive-surgery", + "metadata": {}, + "source": [ + "You can also selectively override seaborn's default parameter values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "coastal-sydney", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_style(\"darkgrid\", {\"grid.color\": \".6\", \"grid.linestyle\": \":\"})\n", + "sns.lineplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bright-october", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/set_theme.ipynb b/doc/_docstrings/set_theme.ipynb new file mode 100644 index 0000000000..c2820ab9cd --- /dev/null +++ b/doc/_docstrings/set_theme.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "flush-block", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "id": "remarkable-confirmation", + "metadata": {}, + "source": [ + "By default, seaborn plots will be made with the current values of the matplotlib rcParams:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "viral-highway", + "metadata": {}, + "outputs": [], + "source": [ + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "hungarian-poster", + "metadata": {}, + "source": [ + "Calling this function with no arguments will activate seaborn's \"default\" theme:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "front-february", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_theme()\n", + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "daily-mills", + "metadata": {}, + "source": [ + "Note that this will take effect for *all* matplotlib plots, including those not made using seaborn:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "essential-replica", + "metadata": {}, + "outputs": [], + "source": [ + "plt.bar([\"A\", \"B\", \"C\"], [1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "naughty-edgar", + "metadata": {}, + "source": [ + "The seaborn theme is decomposed into several distinct sets of parameters that you can control independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "latin-conversion", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_theme(style=\"whitegrid\", palette=\"pastel\")\n", + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "durable-cycling", + "metadata": {}, + "source": [ + "Pass `None` to preserve the current values for a given set of parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "blessed-chuck", + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_theme(style=\"white\", palette=None)\n", + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "markdown", + "id": "present-writing", + "metadata": {}, + "source": [ + "You can also override any seaborn parameters or define additional parameters that are part of the matplotlib rc system but not included in the seaborn themes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "floppy-effectiveness", + "metadata": {}, + "outputs": [], + "source": [ + "custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n", + "sns.set_theme(style=\"ticks\", rc=custom_params)\n", + "sns.barplot(x=[\"A\", \"B\", \"C\"], y=[1, 3, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "large-transfer", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_docstrings/stripplot.ipynb b/doc/_docstrings/stripplot.ipynb new file mode 100644 index 0000000000..386ad117fd --- /dev/null +++ b/doc/_docstrings/stripplot.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a single numeric variable shows its univariate distribution with points randomly \"jittered\" on the other axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.stripplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a second variable splits the strips of points to compare categorical levels of that variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Show vertically-oriented strips by swapping the assignment of the categorical and numerical variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"day\", y=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Prior to version 0.12, the levels of the categorical variable had different colors by default. To get the same effect, assign the `hue` variable explicitly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"day\", legend=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or you can assign a distinct variable to `hue` to show a multidimensional relationship:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If the `hue` variable is numeric, it will be mapped with a quantitative palette by default (note that this was not the case prior to version 0.12):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use `palette` to control the color mapping, including forcing a categorical mapping by passing the name of a qualitative palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"size\", palette=\"deep\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the different levels of the `hue` variable are intermingled in each strip, but setting `dodge=True` will split them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\", dodge=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The random jitter can be disabled by setting `jitter=False`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\", dodge=True, jitter=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If plotting in wide-form mode, each numeric column of the dataframe will be mapped to both `x` and `hue`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To change the orientation while in wide-form mode, pass `orient` explicitly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, orient=\"h\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The `orient` parameter is also useful when both axis variables are numeric, as it will resolve ambiguity about which dimension to group (and jitter) along:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(data=tips, x=\"total_bill\", y=\"size\", orient=\"h\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the categorical variable will be mapped to discrete indices with a fixed scale (0, 1, ...), even when it is numeric:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(\n", + " data=tips.query(\"size in [2, 3, 5]\"),\n", + " x=\"total_bill\", y=\"size\", orient=\"h\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To disable this behavior and use the original scale of the variable, set `native_scale=True`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(\n", + " data=tips.query(\"size in [2, 3, 5]\"),\n", + " x=\"total_bill\", y=\"size\", orient=\"h\",\n", + " native_scale=True,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Further visual customization can be achieved by passing keyword arguments for :func:`matplotlib.axes.Axes.scatter`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.stripplot(\n", + " data=tips, x=\"total_bill\", y=\"day\", hue=\"time\",\n", + " jitter=False, s=20, marker=\"D\", linewidth=1, alpha=.1,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To make a plot with multiple facets, it is safer to use :func:`catplot` than to work with :class:`FacetGrid` directly, because :func:`catplot` will ensure that the categorical and hue variables are properly synchronized in each facet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"time\", y=\"total_bill\", hue=\"sex\", col=\"day\", aspect=.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/swarmplot.ipynb b/doc/_docstrings/swarmplot.ipynb new file mode 100644 index 0000000000..c3341c5172 --- /dev/null +++ b/doc/_docstrings/swarmplot.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a single numeric variable shows its univariate distribution with points adjusted along on the other axis such that they don't overlap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.swarmplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a second variable splits the groups of points to compare categorical levels of that variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Show vertically-oriented swarms by swapping the assignment of the categorical and numerical variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"day\", y=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Prior to version 0.12, the levels of the categorical variable had different colors by default. To get the same effect, assign the `hue` variable explicitly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"day\", legend=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or you can assign a distinct variable to `hue` to show a multidimensional relationship:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If the `hue` variable is numeric, it will be mapped with a quantitative palette by default (note that this was not the case prior to version 0.12):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Use `palette` to control the color mapping, including forcing a categorical mapping by passing the name of a qualitative palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"size\", palette=\"deep\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the different levels of the `hue` variable are intermingled in each swarm, but setting `dodge=True` will split them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\", dodge=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The \"orientation\" of the plot (defined as the direction along which quantitative relationships are preserved) is usually inferred automatically. But in ambiguous cases, such as when both axis variables are numeric, it can be specified:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"size\", orient=\"h\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When the local density of points is too high, they will be forced to overlap in the \"gutters\" of each swarm and a warning will be issued. Decreasing the size of the points can help to avoid this problem:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(data=tips, x=\"total_bill\", y=\"size\", orient=\"h\", size=3)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the categorical variable will be mapped to discrete indices with a fixed scale (0, 1, ...), even when it is numeric:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(\n", + " data=tips.query(\"size in [2, 3, 5]\"),\n", + " x=\"total_bill\", y=\"size\", orient=\"h\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To disable this behavior and use the original scale of the variable, set `native_scale=True` (notice how this also changes the order of the variables on the y axis):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(\n", + " data=tips.query(\"size in [2, 3, 5]\"),\n", + " x=\"total_bill\", y=\"size\", orient=\"h\",\n", + " native_scale=True,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Further visual customization can be achieved by passing keyword arguments for :func:`matplotlib.axes.Axes.scatter`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.swarmplot(\n", + " data=tips, x=\"total_bill\", y=\"day\",\n", + " marker=\"x\", linewidth=1, \n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To make a plot with multiple facets, it is safer to use :func:`catplot` with `kind=\"swarm\"` than to work with :class:`FacetGrid` directly, because :func:`catplot` will ensure that the categorical and hue variables are properly synchronized in each facet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, kind=\"swarm\",\n", + " x=\"time\", y=\"total_bill\", hue=\"sex\", col=\"day\",\n", + " aspect=.5\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_docstrings/violinplot.ipynb b/doc/_docstrings/violinplot.ipynb new file mode 100644 index 0000000000..25ea947f65 --- /dev/null +++ b/doc/_docstrings/violinplot.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "cc19031c-bc2f-4294-95ce-3a2d9b86f44d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style=\"whitegrid\")" + ] + }, + { + "cell_type": "raw", + "id": "c72b5394-ff5f-42b1-b083-2e42b2ffdf0f", + "metadata": {}, + "source": [ + "The default violinplot represents a distribution two ways: a patch showing a symmetric kernel density estimate (KDE), and the quartiles / whiskers of a box plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27d578fb-1c20-4d31-b93d-b1b4a053992b", + "metadata": {}, + "outputs": [], + "source": [ + "df = sns.load_dataset(\"titanic\")\n", + "sns.violinplot(x=df[\"age\"])" + ] + }, + { + "cell_type": "raw", + "id": "e7d25589-0dc9-48ce-92f9-ab61ffbf964a", + "metadata": {}, + "source": [ + "In a bivariate plot, one of the variables will \"group\" so that multiple violins are drawn:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b851b2c-0011-4cff-8719-11f6138c44e7", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", y=\"class\")" + ] + }, + { + "cell_type": "raw", + "id": "6d588b32-b14b-4b33-bbd9-69b17f8212a6", + "metadata": {}, + "source": [ + "By default, the orientation of the plot is determined by the variable types, preferring to group by a categorical variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4810c8e7-0864-496f-8e86-a6527369b9e1", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"class\", y=\"age\", hue=\"alive\")" + ] + }, + { + "cell_type": "raw", + "id": "402812f2-c024-4179-9fee-fed92f03deb2", + "metadata": {}, + "source": [ + "Pass `fill=False` to draw line-art violins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e00ce8b-5871-486b-8c55-a4f2e764aa86", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"class\", y=\"age\", hue=\"alive\", fill=False)" + ] + }, + { + "cell_type": "raw", + "id": "8350abce-6a40-4e18-9501-7d358192471b", + "metadata": {}, + "source": [ + "Draw \"split\" violins to take up less space, and only show the data quarties:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ae35376-5272-496c-afec-c60a3426f1bf", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"class\", y=\"age\", hue=\"alive\", split=True, inner=\"quart\")" + ] + }, + { + "cell_type": "raw", + "id": "90f4263f-7294-4ad5-bff4-25d7d796cb45", + "metadata": {}, + "source": [ + "Add a small gap between the dodged violins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26cb5b89-496d-4893-8914-ca8b6fbf97b7", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"class\", y=\"age\", hue=\"alive\", split=True, gap=.1, inner=\"quart\")" + ] + }, + { + "cell_type": "raw", + "id": "bbea49e0-7b08-4b25-8686-1d5404b71601", + "metadata": {}, + "source": [ + "Starting in version 0.13.0, it is possible to \"split\" single violins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba261531-a280-44e5-b8c0-bcc5a53f60bf", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"class\", y=\"age\", split=True, inner=\"quart\")" + ] + }, + { + "cell_type": "raw", + "id": "7c4dafa1-2747-4b43-ba4a-4c9b32778086", + "metadata": {}, + "source": [ + "Represent every observation inside the distribution by setting `inner=\"stick\"` or `inner=\"point\"`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00b5f00e-a515-4e53-9d73-d13b045cd4c8", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", y=\"deck\", inner=\"point\")" + ] + }, + { + "cell_type": "raw", + "id": "23c13695-cd01-4da8-bc89-2519ae445f9f", + "metadata": {}, + "source": [ + "Normalize the width of each violin to represent the number of observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be59f17e-824e-4a8c-a0e1-a27874a05df6", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", y=\"deck\", inner=\"point\", density_norm=\"count\")" + ] + }, + { + "cell_type": "raw", + "id": "abe650fb-4d26-4bac-97f3-f451a3872cf5", + "metadata": {}, + "source": [ + "By default, the KDE will smooth past the extremes of the observed data; set `cut=0` to prevent this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82556de0-3756-426c-a591-9af6ed6c45d4", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", y=\"alive\", cut=0, inner=\"stick\")" + ] + }, + { + "cell_type": "raw", + "id": "abfb9e78-d524-4536-90ef-c71834b055f9", + "metadata": {}, + "source": [ + "The `bw_adjust` parameter controls the amount of smoothing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d17e1e3-e0f4-4d2c-ac6e-aec42ed75390", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", y=\"alive\", bw_adjust=.5, inner=\"stick\")" + ] + }, + { + "cell_type": "raw", + "id": "407bc513-5b7f-418c-8ffe-ec488836586d", + "metadata": {}, + "source": [ + "By default, the violins are drawn at fixed positions on a categorical scale, even if the grouping variable is numeric. Starting in version 0.13.0, pass the `native_scale=True` parameter to preserve the original scale on both axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7b6d901-9a97-4716-8d24-1b30145e9c57", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(x=df[\"age\"].round(-1) + 5, y=df[\"fare\"], native_scale=True)" + ] + }, + { + "cell_type": "raw", + "id": "790e3989-0b47-4e77-9bdb-dc757d1e938c", + "metadata": {}, + "source": [ + "When using a categorical scale, the `formatter` parameter accepts a function that defines categories:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28a769d4-3e23-4b53-a9ef-391d5fc24201", + "metadata": {}, + "outputs": [], + "source": [ + "decades = lambda x: f\"{int(x)}–{int(x + 10)}\"\n", + "sns.violinplot(x=df[\"age\"].round(-1), y=df[\"fare\"], formatter=decades)" + ] + }, + { + "cell_type": "raw", + "id": "6f914d73-7a0c-4fbc-8432-40c4f0577857", + "metadata": {}, + "source": [ + "By default, the \"inner\" representation scales with the `linewidth` and `linecolor` parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18cb2afd-8487-40bd-b3f2-1f83243ffa3c", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", linewidth=1, linecolor=\"k\")" + ] + }, + { + "cell_type": "raw", + "id": "ca2ef541-c07f-4853-ba98-ce75855ba262", + "metadata": {}, + "source": [ + "Use `inner_kws` to pass parameters directly to the inner plotting functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "934f91bc-2698-4c07-92cf-4e6039c801b2", + "metadata": {}, + "outputs": [], + "source": [ + "sns.violinplot(data=df, x=\"age\", inner_kws=dict(box_width=15, whis_width=2, color=\".8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4aa00d3c-f016-4db8-b6b0-da4e6a327831", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_static/copybutton.js b/doc/_static/copybutton.js index 16244343cb..0a7db6d6dc 100644 --- a/doc/_static/copybutton.js +++ b/doc/_static/copybutton.js @@ -2,11 +2,11 @@ $(document).ready(function() { /* Add a [>>>] button on the top-right corner of code samples to hide * the >>> and ... prompts and the output and thus make the code - * copyable. + * copyable. * Note: This JS snippet was taken from the official python.org * documentation site.*/ var div = $('.highlight-python .highlight,' + - '.highlight-python3 .highlight,' + + '.highlight-python3 .highlight,' + '.highlight-pycon .highlight') var pre = div.find('pre'); diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css new file mode 100644 index 0000000000..3ecccfdb1d --- /dev/null +++ b/doc/_static/css/custom.css @@ -0,0 +1,113 @@ +/**** Overriding theme defaults ****/ + +html[data-theme=light]{ + --pst-color-primary: rgb(52, 54, 99); + --pst-color-secondary: rgb(107, 161, 174); + --pst-color-link: rgb(74, 105, 145); + --pst-color-inline-code: rgb(96, 141, 130); +} + +:root { + --pst-font-size-h1: 38px; + --pst-font-size-h2: 32px; + --pst-font-size-h3: 27px; + --pst-font-size-h4: 22px; + --pst-font-size-h5: 18px; + --pst-font-size-h6: 15px; + +} + +/* Syntax highlighting */ + +/* string literals */ +html[data-theme=light] .highlight .s2 { + color: rgb(74, 105, 145); + font-weight: normal; +} +/* number literals */ +html[data-theme=light] .highlight .mi { + color: rgb(136, 97, 153); + font-weight: normal; +} +html[data-theme=light] .highlight .mf { + color: rgb(136, 97, 153); + font-weight: normal; +} +/* operators */ +html[data-theme=light] .highlight .o { + color: rgb(219, 164, 117); + font-weight: bold; +} +/* builtins */ +html[data-theme=light] .highlight .kc { + color: rgb(107, 161, 174); + font-weight: bold; +} + +/* Use full page width without sidebars */ +.bd-content { + max-width: 100%; + flex-grow: 1; +} + +/* Function signature customization */ +dt { + font-weight: 500; + color: rgb(52, 54, 99); +} + +span.default_value { + color: rgb(124, 141, 138); +} + +/* highlight over function signature after link */ +dt:target, span.highlighted { + background-color: #fdebba; +} + +/* *********************************************************************** */ + +/* --- Badges for categorizing release notes --- */ + +.label, +.badge { + display: inline-block; + padding: 2px 4px; + font-size: 11.844px; + /* font-weight: bold; */ + line-height: 13px; + color: #ffffff; + vertical-align: baseline; + white-space: nowrap; + /* text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25); */ + background-color: #999999; +} +.badge { + padding-left: 9px; + padding-right: 9px; + -webkit-border-radius: 9px; + -moz-border-radius: 9px; + border-radius: 9px; + opacity: 70%; +} +.badge-api { + background-color: #c44e52; +} +.badge-defaults { + background-color: #dd8452; +} +.badge-docs { + background-color: #8172b3; +} +.badge-feature { + background-color: #55a868; +} +.badge-enhancement { + background-color: #4c72b0; +} +.badge-fix { + background-color: #ccb974; +} +.badge-build { + background-color: #937860; +} diff --git a/doc/_static/favicon.ico b/doc/_static/favicon.ico new file mode 100755 index 0000000000..fac1e28c2c Binary files /dev/null and b/doc/_static/favicon.ico differ diff --git a/doc/_static/favicon_old.ico b/doc/_static/favicon_old.ico new file mode 100644 index 0000000000..1145b96d4e Binary files /dev/null and b/doc/_static/favicon_old.ico differ diff --git a/doc/_static/logo-mark-darkbg.png b/doc/_static/logo-mark-darkbg.png new file mode 100644 index 0000000000..d585461137 Binary files /dev/null and b/doc/_static/logo-mark-darkbg.png differ diff --git a/doc/_static/logo-mark-darkbg.svg b/doc/_static/logo-mark-darkbg.svg new file mode 100644 index 0000000000..4b06364224 --- /dev/null +++ b/doc/_static/logo-mark-darkbg.svg @@ -0,0 +1,4946 @@ + + + + + + + + + 2020-09-07T14:13:59.975140 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-mark-lightbg.png b/doc/_static/logo-mark-lightbg.png new file mode 100644 index 0000000000..378044557f Binary files /dev/null and b/doc/_static/logo-mark-lightbg.png differ diff --git a/doc/_static/logo-mark-lightbg.svg b/doc/_static/logo-mark-lightbg.svg new file mode 100644 index 0000000000..1405269edc --- /dev/null +++ b/doc/_static/logo-mark-lightbg.svg @@ -0,0 +1,4946 @@ + + + + + + + + + 2020-09-07T14:13:57.855925 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-mark-whitebg.png b/doc/_static/logo-mark-whitebg.png new file mode 100644 index 0000000000..2e022db5d7 Binary files /dev/null and b/doc/_static/logo-mark-whitebg.png differ diff --git a/doc/_static/logo-tall-darkbg.png b/doc/_static/logo-tall-darkbg.png new file mode 100644 index 0000000000..0a2e3c06d9 Binary files /dev/null and b/doc/_static/logo-tall-darkbg.png differ diff --git a/doc/_static/logo-tall-darkbg.svg b/doc/_static/logo-tall-darkbg.svg new file mode 100644 index 0000000000..3d7d910206 --- /dev/null +++ b/doc/_static/logo-tall-darkbg.svg @@ -0,0 +1,5206 @@ + + + + + + + + + 2020-09-07T14:14:01.511527 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-tall-lightbg.png b/doc/_static/logo-tall-lightbg.png new file mode 100644 index 0000000000..347dd9b344 Binary files /dev/null and b/doc/_static/logo-tall-lightbg.png differ diff --git a/doc/_static/logo-tall-lightbg.svg b/doc/_static/logo-tall-lightbg.svg new file mode 100644 index 0000000000..eb52f345c0 --- /dev/null +++ b/doc/_static/logo-tall-lightbg.svg @@ -0,0 +1,5206 @@ + + + + + + + + + 2020-09-07T14:13:59.334522 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-tall-whitebg.png b/doc/_static/logo-tall-whitebg.png new file mode 100644 index 0000000000..002c383a22 Binary files /dev/null and b/doc/_static/logo-tall-whitebg.png differ diff --git a/doc/_static/logo-wide-darkbg.png b/doc/_static/logo-wide-darkbg.png new file mode 100644 index 0000000000..e2d087b186 Binary files /dev/null and b/doc/_static/logo-wide-darkbg.png differ diff --git a/doc/_static/logo-wide-darkbg.svg b/doc/_static/logo-wide-darkbg.svg new file mode 100644 index 0000000000..83b0ef8289 --- /dev/null +++ b/doc/_static/logo-wide-darkbg.svg @@ -0,0 +1,5216 @@ + + + + + + + + + 2020-09-07T14:14:00.795540 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-wide-lightbg.png b/doc/_static/logo-wide-lightbg.png new file mode 100644 index 0000000000..ec249b06ca Binary files /dev/null and b/doc/_static/logo-wide-lightbg.png differ diff --git a/doc/_static/logo-wide-lightbg.svg b/doc/_static/logo-wide-lightbg.svg new file mode 100644 index 0000000000..57f1f71345 --- /dev/null +++ b/doc/_static/logo-wide-lightbg.svg @@ -0,0 +1,5216 @@ + + + + + + + + + 2020-09-07T14:13:58.676334 + image/svg+xml + + + Matplotlib v3.3.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/logo-wide-whitebg.png b/doc/_static/logo-wide-whitebg.png new file mode 100644 index 0000000000..4638939fab Binary files /dev/null and b/doc/_static/logo-wide-whitebg.png differ diff --git a/doc/_static/style.css b/doc/_static/style.css deleted file mode 100644 index a94c8096ec..0000000000 --- a/doc/_static/style.css +++ /dev/null @@ -1,15 +0,0 @@ - -blockquote p { - font-size: 14px !important; -} - -code { - color: #49759c !important; - background-color: #f3f5f9 !important; -} - -.alert-info { - background-color: #adb8cb !important; - border-color: #adb8cb !important; - color: #2c3e50 !important; -} diff --git a/doc/_templates/autosummary/base.rst b/doc/_templates/autosummary/base.rst new file mode 100644 index 0000000000..b7556ebf7b --- /dev/null +++ b/doc/_templates/autosummary/base.rst @@ -0,0 +1,5 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} diff --git a/doc/_templates/autosummary/class.rst b/doc/_templates/autosummary/class.rst new file mode 100644 index 0000000000..c27ca38eca --- /dev/null +++ b/doc/_templates/autosummary/class.rst @@ -0,0 +1,30 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block methods %} + .. automethod:: __init__ + + {% if methods %} + .. rubric:: Methods + + .. autosummary:: + :toctree: ./ + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/doc/_templates/autosummary/object.rst b/doc/_templates/autosummary/object.rst new file mode 100644 index 0000000000..d4fd5208b6 --- /dev/null +++ b/doc/_templates/autosummary/object.rst @@ -0,0 +1,5 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} diff --git a/doc/_templates/autosummary/plot.rst b/doc/_templates/autosummary/plot.rst new file mode 100644 index 0000000000..aae1c66570 --- /dev/null +++ b/doc/_templates/autosummary/plot.rst @@ -0,0 +1,69 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + +{% block methods %} + +Methods +~~~~~~~ + +.. rubric:: Specification methods + +.. autosummary:: + :toctree: ./ + :nosignatures: + + ~Plot.add + ~Plot.scale + +.. rubric:: Subplot methods + +.. autosummary:: + :toctree: ./ + :nosignatures: + + ~Plot.facet + ~Plot.pair + +.. rubric:: Customization methods + +.. autosummary:: + :toctree: ./ + :nosignatures: + + ~Plot.layout + ~Plot.label + ~Plot.limit + ~Plot.share + ~Plot.theme + +.. rubric:: Integration methods + +.. autosummary:: + :toctree: ./ + :nosignatures: + + ~Plot.on + +.. rubric:: Output methods + +.. autosummary:: + :toctree: ./ + :nosignatures: + + ~Plot.plot + ~Plot.save + ~Plot.show + +{% endblock %} + +.. _plot_config: + +Configuration +~~~~~~~~~~~~~ + +The :class:`Plot` object's default behavior can be configured through its :attr:`Plot.config` attribute. Notice that this is a property of the class, not a method on an instance. + +.. include:: ../docstrings/objects.Plot.config.rst diff --git a/doc/_templates/autosummary/scale.rst b/doc/_templates/autosummary/scale.rst new file mode 100644 index 0000000000..a89d76f52b --- /dev/null +++ b/doc/_templates/autosummary/scale.rst @@ -0,0 +1,9 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + .. automethod:: tick + + .. automethod:: label diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html new file mode 100644 index 0000000000..ba0660335f --- /dev/null +++ b/doc/_templates/layout.html @@ -0,0 +1,27 @@ +{% extends "!layout.html" %} + +{%- block footer %} + +{%- endblock %} diff --git a/doc/_templates/version.html b/doc/_templates/version.html new file mode 100644 index 0000000000..e17aac8306 --- /dev/null +++ b/doc/_templates/version.html @@ -0,0 +1,3 @@ + diff --git a/doc/_tutorial/Makefile b/doc/_tutorial/Makefile new file mode 100644 index 0000000000..73168b3edc --- /dev/null +++ b/doc/_tutorial/Makefile @@ -0,0 +1,10 @@ +rst_files := $(patsubst %.ipynb,../tutorial/%.rst,$(wildcard *.ipynb)) +export MPLBACKEND := module://matplotlib_inline.backend_inline + +tutorial: ${rst_files} + +../tutorial/%.rst: %.ipynb + ../tools/nb_to_doc.py $*.ipynb ../tutorial + +clean: + rm -rf ../tutorial diff --git a/doc/tutorial/aesthetics.ipynb b/doc/_tutorial/aesthetics.ipynb similarity index 72% rename from doc/tutorial/aesthetics.ipynb rename to doc/_tutorial/aesthetics.ipynb index 789afdd5f0..63f8198779 100644 --- a/doc/tutorial/aesthetics.ipynb +++ b/doc/_tutorial/aesthetics.ipynb @@ -10,10 +10,11 @@ ] }, { - "cell_type": "markdown", + "cell_type": "raw", "metadata": {}, "source": [ - "# Controlling figure aesthetics" + "Controlling figure aesthetics\n", + "=============================\n" ] }, { @@ -28,25 +29,25 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "%matplotlib inline" + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "tags": [ + "hide" + ] }, "outputs": [], "source": [ - "import numpy as np\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", "np.random.seed(sum(map(ord, \"aesthetics\")))" ] }, @@ -60,15 +61,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "def sinplot(flip=1):\n", + "def sinplot(n=10, flip=1):\n", " x = np.linspace(0, 14, 100)\n", - " for i in range(1, 7):\n", - " plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)" + " for i in range(1, n + 1):\n", + " plt.plot(x, np.sin(x + i * .5) * (n + 2 - i) * flip)" ] }, { @@ -81,9 +80,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sinplot()" @@ -93,18 +90,16 @@ "cell_type": "raw", "metadata": {}, "source": [ - "To switch to seaborn defaults, simply import the package." + "To switch to seaborn defaults, simply call the :func:`set_theme` function." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import seaborn as sns\n", + "sns.set_theme()\n", "sinplot()" ] }, @@ -112,7 +107,7 @@ "cell_type": "raw", "metadata": {}, "source": [ - "The seaborn defaults break from the MATLAB inspired aesthetic of matplotlib to plot in more muted colors over a light gray background with white grid lines. We find that the grid aids in the use of figures for conveying quantitative information – in almost all cases, figures should be preferred to tables. The white-on-gray grid that is used by default avoids being obtrusive. The grid is particularly useful in giving structure to figures with multiple facets, which is central to some of the more complex tools in the library.\n", + "(Note that in versions of seaborn prior to 0.8, :func:`set_theme` was called on import. On later versions, it must be explicitly invoked).\n", "\n", "Seaborn splits matplotlib parameters into two independent groups. The first group sets the aesthetic style of the plot, and the second scales various elements of the figure so that it can be easily incorporated into different contexts.\n", "\n", @@ -120,8 +115,8 @@ "\n", ".. _axes_style:\n", "\n", - "Styling figures with :func:`axes_style` and :func:`set_style`\n", - "-------------------------------------------------------------\n", + "Seaborn figure styles\n", + "---------------------\n", "\n", "There are five preset seaborn themes: ``darkgrid``, ``whitegrid``, ``dark``, ``white``, and ``ticks``. They are each suited to different applications and personal preferences. The default theme is ``darkgrid``. As mentioned above, the grid helps the plot serve as a lookup table for quantitative information, and the white-on grey helps to keep the grid from competing with lines that represent data. The ``whitegrid`` theme is similar, but it is better suited to plots with heavy data elements:" ] @@ -129,9 +124,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_style(\"whitegrid\")\n", @@ -149,44 +142,20 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "sns.set_style(\"dark\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ + "sns.set_style(\"dark\")\n", "sinplot()" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "sns.set_style(\"white\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ + "sns.set_style(\"white\")\n", "sinplot()" ] }, @@ -200,9 +169,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_style(\"ticks\")\n", @@ -215,18 +182,16 @@ "source": [ ".. _remove_spines:\n", "\n", - "Removing spines with :func:`despine`\n", - "------------------------------------\n", + "Removing axes spines\n", + "--------------------\n", "\n", - "Both the ``white`` and ``ticks`` styles can benefit from removing the top and right axes spines, which are not needed. It's impossible to do this through the matplotlib parameters, but you can call the seaborn function :func:`despine` to remove them:" + "Both the ``white`` and ``ticks`` styles can benefit from removing the top and right axes spines, which are not needed. The seaborn function :func:`despine` can be called to remove them:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sinplot()\n", @@ -243,13 +208,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "f, ax = plt.subplots()\n", - "sns.violinplot(data)\n", + "sns.violinplot(data=data)\n", "sns.despine(offset=10, trim=True);" ] }, @@ -263,13 +226,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_style(\"whitegrid\")\n", - "sns.boxplot(data=data, color=\"deep\")\n", + "sns.boxplot(data=data, palette=\"deep\")\n", "sns.despine(left=True)" ] }, @@ -286,16 +247,29 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ + "f = plt.figure(figsize=(6, 6))\n", + "gs = f.add_gridspec(2, 2)\n", + "\n", "with sns.axes_style(\"darkgrid\"):\n", - " plt.subplot(211)\n", - " sinplot()\n", - "plt.subplot(212)\n", - "sinplot(-1)" + " ax = f.add_subplot(gs[0, 0])\n", + " sinplot(6)\n", + " \n", + "with sns.axes_style(\"white\"):\n", + " ax = f.add_subplot(gs[0, 1])\n", + " sinplot(6)\n", + "\n", + "with sns.axes_style(\"ticks\"):\n", + " ax = f.add_subplot(gs[1, 0])\n", + " sinplot(6)\n", + "\n", + "with sns.axes_style(\"whitegrid\"):\n", + " ax = f.add_subplot(gs[1, 1])\n", + " sinplot(6)\n", + " \n", + "f.tight_layout()" ] }, { @@ -305,7 +279,7 @@ "Overriding elements of the seaborn styles\n", "-----------------------------------------\n", "\n", - "If you want to customize the seaborn styles, you can pass a dictionary of parameters to the ``rc`` argument of :func:`axes_style` and :func:`set_style`. Note that you can only override the parameters that are part of the style definition through this method. (However, the higher-level :func:`set` function takes a dictionary of any matplotlib parameters).\n", + "If you want to customize the seaborn styles, you can pass a dictionary of parameters to the ``rc`` argument of :func:`axes_style` and :func:`set_style`. Note that you can only override the parameters that are part of the style definition through this method. (However, the higher-level :func:`set_theme` function takes a dictionary of any matplotlib parameters).\n", "\n", "If you want to see what parameters are included, you can just call the function with no arguments, which will return the current settings:" ] @@ -313,9 +287,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.axes_style()" @@ -331,12 +303,10 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "sns.set_style(\"darkgrid\", {\"grid.linewidth\": .5, \"axes.facecolor\": \".9\"})\n", + "sns.set_style(\"darkgrid\", {\"axes.facecolor\": \".9\"})\n", "sinplot()" ] }, @@ -346,23 +316,21 @@ "source": [ ".. _plotting_context:\n", "\n", - "Scaling plot elements with :func:`plotting_context` and :func:`set_context`\n", - "---------------------------------------------------------------------------\n", + "Scaling plot elements\n", + "---------------------\n", "\n", "A separate set of parameters control the scale of plot elements, which should let you use the same code to make plots that are suited for use in settings where larger or smaller plots are appropriate.\n", "\n", - "First let's reset the default parameters by calling :func:`set`:" + "First let's reset the default parameters by calling :func:`set_theme`:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "sns.set()" + "sns.set_theme()" ] }, { @@ -375,39 +343,30 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_context(\"paper\")\n", - "plt.figure(figsize=(8, 6))\n", "sinplot()" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_context(\"talk\")\n", - "plt.figure(figsize=(8, 6))\n", "sinplot()" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_context(\"poster\")\n", - "plt.figure(figsize=(8, 6))\n", "sinplot()" ] }, @@ -425,9 +384,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "sns.set_context(\"notebook\", font_scale=1.5, rc={\"lines.linewidth\": 2.5})\n", @@ -438,31 +395,32 @@ "cell_type": "raw", "metadata": {}, "source": [ - "Similarly (although it might be less useful), you can temporarily control the scale of figures nested under a ``with`` statement.\n", + "Similarly, you can temporarily control the scale of figures nested under a ``with`` statement.\n", "\n", "Both the style and the context can be quickly configured with the :func:`set` function. This function also sets the default color palette, but that will be covered in more detail in the :ref:`next section ` of the tutorial." ] } ], "metadata": { + "celltoolbar": "Tags", "kernelspec": { - "display_name": "Python 2", + "display_name": "py310", "language": "python", - "name": "python2" + "name": "py310" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.9" + "pygments_lexer": "ipython3", + "version": "3.10.6" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 } diff --git a/doc/_tutorial/axis_grids.ipynb b/doc/_tutorial/axis_grids.ipynb new file mode 100644 index 0000000000..da3cc587e5 --- /dev/null +++ b/doc/_tutorial/axis_grids.ipynb @@ -0,0 +1,553 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _grid_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Building structured multi-plot grids\n", + "====================================\n" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When exploring multi-dimensional data, a useful approach is to draw multiple instances of the same plot on different subsets of your dataset. This technique is sometimes called either \"lattice\" or \"trellis\" plotting, and it is related to the idea of `\"small multiples\" `_. It allows a viewer to quickly extract a large amount of information about a complex dataset. Matplotlib offers good support for making figures with multiple axes; seaborn builds on top of this to directly link the structure of the plot to the structure of your dataset.\n", + "\n", + "The :doc:`figure-level ` functions are built on top of the objects discussed in this chapter of the tutorial. In most cases, you will want to work with those functions. They take care of some important bookkeeping that synchronizes the multiple plots in each grid. This chapter explains how the underlying objects work, which may be useful for advanced applications." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "sns.set_theme(style=\"ticks\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "np.random.seed(sum(map(ord, \"axis_grids\")))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _facet_grid:\n", + "\n", + "Conditional small multiples\n", + "---------------------------\n", + "\n", + "The :class:`FacetGrid` class is useful when you want to visualize the distribution of a variable or the relationship between multiple variables separately within subsets of your dataset. A :class:`FacetGrid` can be drawn with up to three dimensions: ``row``, ``col``, and ``hue``. The first two have obvious correspondence with the resulting array of axes; think of the hue variable as a third dimension along a depth axis, where different levels are plotted with different colors.\n", + "\n", + "Each of :func:`relplot`, :func:`displot`, :func:`catplot`, and :func:`lmplot` use this object internally, and they return the object when they are finished so that it can be used for further tweaking.\n", + "\n", + "The class is used by initializing a :class:`FacetGrid` object with a dataframe and the names of the variables that will form the row, column, or hue dimensions of the grid. These variables should be categorical or discrete, and then the data at each level of the variable will be used for a facet along that axis. For example, say we wanted to examine differences between lunch and dinner in the ``tips`` dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "g = sns.FacetGrid(tips, col=\"time\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Initializing the grid like this sets up the matplotlib figure and axes, but doesn't draw anything on them.\n", + "\n", + "The main approach for visualizing data on this grid is with the :meth:`FacetGrid.map` method. Provide it with a plotting function and the name(s) of variable(s) in the dataframe to plot. Let's look at the distribution of tips in each of these subsets, using a histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"time\")\n", + "g.map(sns.histplot, \"tip\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This function will draw the figure and annotate the axes, hopefully producing a finished plot in one step. To make a relational plot, just pass multiple variable names. You can also provide keyword arguments, which will be passed to the plotting function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"sex\", hue=\"smoker\")\n", + "g.map(sns.scatterplot, \"total_bill\", \"tip\", alpha=.7)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "There are several options for controlling the look of the grid that can be passed to the class constructor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, row=\"smoker\", col=\"time\", margin_titles=True)\n", + "g.map(sns.regplot, \"size\", \"total_bill\", color=\".3\", fit_reg=False, x_jitter=.1)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Note that ``margin_titles`` isn't formally supported by the matplotlib API, and may not work well in all cases. In particular, it currently can't be used with a legend that lies outside of the plot.\n", + "\n", + "The size of the figure is set by providing the height of *each* facet, along with the aspect ratio:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"day\", height=4, aspect=.5)\n", + "g.map(sns.barplot, \"sex\", \"total_bill\", order=[\"Male\", \"Female\"])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The default ordering of the facets is derived from the information in the DataFrame. If the variable used to define facets has a categorical type, then the order of the categories is used. Otherwise, the facets will be in the order of appearance of the category levels. It is possible, however, to specify an ordering of any facet dimension with the appropriate ``*_order`` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ordered_days = tips.day.value_counts().index\n", + "g = sns.FacetGrid(tips, row=\"day\", row_order=ordered_days,\n", + " height=1.7, aspect=4,)\n", + "g.map(sns.kdeplot, \"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Any seaborn color palette (i.e., something that can be passed to :func:`color_palette()`) can be provided. You can also use a dictionary that maps the names of values in the ``hue`` variable to valid matplotlib colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pal = dict(Lunch=\"seagreen\", Dinner=\".7\")\n", + "g = sns.FacetGrid(tips, hue=\"time\", palette=pal, height=5)\n", + "g.map(sns.scatterplot, \"total_bill\", \"tip\", s=100, alpha=.5)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If you have many levels of one variable, you can plot it along the columns but \"wrap\" them so that they span multiple rows. When doing this, you cannot use a ``row`` variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "attend = sns.load_dataset(\"attention\").query(\"subject <= 12\")\n", + "g = sns.FacetGrid(attend, col=\"subject\", col_wrap=4, height=2, ylim=(0, 10))\n", + "g.map(sns.pointplot, \"solutions\", \"score\", order=[1, 2, 3], color=\".3\", errorbar=None)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Once you've drawn a plot using :meth:`FacetGrid.map` (which can be called multiple times), you may want to adjust some aspects of the plot. There are also a number of methods on the :class:`FacetGrid` object for manipulating the figure at a higher level of abstraction. The most general is :meth:`FacetGrid.set`, and there are other more specialized methods like :meth:`FacetGrid.set_axis_labels`, which respects the fact that interior facets do not have axis labels. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with sns.axes_style(\"white\"):\n", + " g = sns.FacetGrid(tips, row=\"sex\", col=\"smoker\", margin_titles=True, height=2.5)\n", + "g.map(sns.scatterplot, \"total_bill\", \"tip\", color=\"#334488\")\n", + "g.set_axis_labels(\"Total bill (US Dollars)\", \"Tip\")\n", + "g.set(xticks=[10, 30, 50], yticks=[2, 6, 10])\n", + "g.figure.subplots_adjust(wspace=.02, hspace=.02)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "For even more customization, you can work directly with the underling matplotlib ``Figure`` and ``Axes`` objects, which are stored as member attributes at ``figure`` and ``axes_dict``, respectively. When making a figure without row or column faceting, you can also use the ``ax`` attribute to directly access the single axes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, col=\"smoker\", margin_titles=True, height=4)\n", + "g.map(plt.scatter, \"total_bill\", \"tip\", color=\"#338844\", edgecolor=\"white\", s=50, lw=1)\n", + "for ax in g.axes_dict.values():\n", + " ax.axline((0, 0), slope=.2, c=\".2\", ls=\"--\", zorder=0)\n", + "g.set(xlim=(0, 60), ylim=(0, 14))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _custom_map_func:\n", + "\n", + "Using custom functions\n", + "----------------------\n", + "\n", + "You're not limited to existing matplotlib and seaborn functions when using :class:`FacetGrid`. However, to work properly, any function you use must follow a few rules:\n", + "\n", + "1. It must plot onto the \"currently active\" matplotlib ``Axes``. This will be true of functions in the ``matplotlib.pyplot`` namespace, and you can call :func:`matplotlib.pyplot.gca` to get a reference to the current ``Axes`` if you want to work directly with its methods.\n", + "2. It must accept the data that it plots in positional arguments. Internally, :class:`FacetGrid` will pass a ``Series`` of data for each of the named positional arguments passed to :meth:`FacetGrid.map`.\n", + "3. It must be able to accept ``color`` and ``label`` keyword arguments, and, ideally, it will do something useful with them. In most cases, it's easiest to catch a generic dictionary of ``**kwargs`` and pass it along to the underlying plotting function.\n", + "\n", + "Let's look at minimal example of a function you can plot with. This function will just take a single vector of data for each facet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import stats\n", + "def quantile_plot(x, **kwargs):\n", + " quantiles, xr = stats.probplot(x, fit=False)\n", + " plt.scatter(xr, quantiles, **kwargs)\n", + " \n", + "g = sns.FacetGrid(tips, col=\"sex\", height=4)\n", + "g.map(quantile_plot, \"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If we want to make a bivariate plot, you should write the function so that it accepts the x-axis variable first and the y-axis variable second:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def qqplot(x, y, **kwargs):\n", + " _, xr = stats.probplot(x, fit=False)\n", + " _, yr = stats.probplot(y, fit=False)\n", + " plt.scatter(xr, yr, **kwargs)\n", + " \n", + "g = sns.FacetGrid(tips, col=\"smoker\", height=4)\n", + "g.map(qqplot, \"total_bill\", \"tip\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Because :func:`matplotlib.pyplot.scatter` accepts ``color`` and ``label`` keyword arguments and does the right thing with them, we can add a hue facet without any difficulty:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(tips, hue=\"time\", col=\"sex\", height=4)\n", + "g.map(qqplot, \"total_bill\", \"tip\")\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Sometimes, though, you'll want to map a function that doesn't work the way you expect with the ``color`` and ``label`` keyword arguments. In this case, you'll want to explicitly catch them and handle them in the logic of your custom function. For example, this approach will allow use to map :func:`matplotlib.pyplot.hexbin`, which otherwise does not play well with the :class:`FacetGrid` API:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def hexbin(x, y, color, **kwargs):\n", + " cmap = sns.light_palette(color, as_cmap=True)\n", + " plt.hexbin(x, y, gridsize=15, cmap=cmap, **kwargs)\n", + "\n", + "with sns.axes_style(\"dark\"):\n", + " g = sns.FacetGrid(tips, hue=\"time\", col=\"time\", height=4)\n", + "g.map(hexbin, \"total_bill\", \"tip\", extent=[0, 50, 0, 10]);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _pair_grid:\n", + "\n", + "Plotting pairwise data relationships\n", + "------------------------------------\n", + "\n", + ":class:`PairGrid` also allows you to quickly draw a grid of small subplots using the same plot type to visualize data in each. In a :class:`PairGrid`, each row and column is assigned to a different variable, so the resulting plot shows each pairwise relationship in the dataset. This style of plot is sometimes called a \"scatterplot matrix\", as this is the most common way to show each relationship, but :class:`PairGrid` is not limited to scatterplots.\n", + "\n", + "It's important to understand the differences between a :class:`FacetGrid` and a :class:`PairGrid`. In the former, each facet shows the same relationship conditioned on different levels of other variables. In the latter, each plot shows a different relationship (although the upper and lower triangles will have mirrored plots). Using :class:`PairGrid` can give you a very quick, very high-level summary of interesting relationships in your dataset.\n", + "\n", + "The basic usage of the class is very similar to :class:`FacetGrid`. First you initialize the grid, then you pass plotting function to a ``map`` method and it will be called on each subplot. There is also a companion function, :func:`pairplot` that trades off some flexibility for faster plotting.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris = sns.load_dataset(\"iris\")\n", + "g = sns.PairGrid(iris)\n", + "g.map(sns.scatterplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's possible to plot a different function on the diagonal to show the univariate distribution of the variable in each column. Note that the axis ticks won't correspond to the count or density axis of this plot, though." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(iris)\n", + "g.map_diag(sns.histplot)\n", + "g.map_offdiag(sns.scatterplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A very common way to use this plot colors the observations by a separate categorical variable. For example, the iris dataset has four measurements for each of three different species of iris flowers so you can see how they differ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(iris, hue=\"species\")\n", + "g.map_diag(sns.histplot)\n", + "g.map_offdiag(sns.scatterplot)\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default every numeric column in the dataset is used, but you can focus on particular relationships if you want." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(iris, vars=[\"sepal_length\", \"sepal_width\"], hue=\"species\")\n", + "g.map(sns.scatterplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to use a different function in the upper and lower triangles to emphasize different aspects of the relationship." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(iris)\n", + "g.map_upper(sns.scatterplot)\n", + "g.map_lower(sns.kdeplot)\n", + "g.map_diag(sns.kdeplot, lw=3, legend=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The square grid with identity relationships on the diagonal is actually just a special case, and you can plot with different variables in the rows and columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(tips, y_vars=[\"tip\"], x_vars=[\"total_bill\", \"size\"], height=4)\n", + "g.map(sns.regplot, color=\".3\")\n", + "g.set(ylim=(-1, 11), yticks=[0, 5, 10])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Of course, the aesthetic attributes are configurable. For instance, you can use a different palette (say, to show an ordering of the ``hue`` variable) and pass keyword arguments into the plotting functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(tips, hue=\"size\", palette=\"GnBu_d\")\n", + "g.map(plt.scatter, s=50, edgecolor=\"white\")\n", + "g.add_legend()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ":class:`PairGrid` is flexible, but to take a quick look at a dataset, it can be easier to use :func:`pairplot`. This function uses scatterplots and histograms by default, although a few other kinds will be added (currently, you can also plot regression plots on the off-diagonals and KDEs on the diagonal)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(iris, hue=\"species\", height=2.5)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You can also control the aesthetics of the plot with keyword arguments, and it returns the :class:`PairGrid` instance for further tweaking." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.pairplot(iris, hue=\"species\", palette=\"Set2\", diag_kind=\"kde\", height=2.5)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/categorical.ipynb b/doc/_tutorial/categorical.ipynb new file mode 100644 index 0000000000..33c589c243 --- /dev/null +++ b/doc/_tutorial/categorical.ipynb @@ -0,0 +1,555 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _categorical_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Visualizing categorical data\n", + "============================\n", + " \n", + "In the :ref:`relational plot tutorial ` we saw how to use different visual representations to show the relationship between multiple variables in a dataset. In the examples, we focused on cases where the main relationship was between two numerical variables. If one of the main variables is \"categorical\" (divided into discrete groups) it may be helpful to use a more specialized approach to visualization.\n", + "\n", + "In seaborn, there are several different ways to visualize a relationship involving categorical data. Similar to the relationship between :func:`relplot` and either :func:`scatterplot` or :func:`lineplot`, there are two ways to make these plots. There are a number of axes-level functions for plotting categorical data in different ways and a figure-level interface, :func:`catplot`, that gives unified higher-level access to them.\n", + "\n", + "It's helpful to think of the different categorical plot kinds as belonging to three different families, which we'll discuss in detail below. They are:\n", + "\n", + "Categorical scatterplots:\n", + "\n", + "- :func:`stripplot` (with ``kind=\"strip\"``; the default)\n", + "- :func:`swarmplot` (with ``kind=\"swarm\"``)\n", + "\n", + "Categorical distribution plots:\n", + "\n", + "- :func:`boxplot` (with ``kind=\"box\"``)\n", + "- :func:`violinplot` (with ``kind=\"violin\"``)\n", + "- :func:`boxenplot` (with ``kind=\"boxen\"``)\n", + "\n", + "Categorical estimate plots:\n", + "\n", + "- :func:`pointplot` (with ``kind=\"point\"``)\n", + "- :func:`barplot` (with ``kind=\"bar\"``)\n", + "- :func:`countplot` (with ``kind=\"count\"``)\n", + "\n", + "These families represent the data using different levels of granularity. When deciding which to use, you'll have to think about the question that you want to answer. The unified API makes it easy to switch between different kinds and see your data from several perspectives.\n", + "\n", + "In this tutorial, we'll mostly focus on the figure-level interface, :func:`catplot`. Remember that this function is a higher-level interface each of the functions above, so we'll reference them when we show each kind of plot, keeping the more verbose kind-specific API documentation at hand." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme(style=\"ticks\", color_codes=True)\n", + "np.random.seed(sum(map(ord, \"categorical\")))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Categorical scatterplots\n", + "------------------------\n", + "\n", + "The default representation of the data in :func:`catplot` uses a scatterplot. There are actually two different categorical scatter plots in seaborn. They take different approaches to resolving the main challenge in representing categorical data with a scatter plot, which is that all of the points belonging to one category would fall on the same position along the axis corresponding to the categorical variable. The approach used by :func:`stripplot`, which is the default \"kind\" in :func:`catplot` is to adjust the positions of points on the categorical axis with a small amount of random \"jitter\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``jitter`` parameter controls the magnitude of jitter or disables it altogether:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", jitter=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The second approach adjusts the points along the categorical axis using an algorithm that prevents them from overlapping. It can give a better representation of the distribution of observations, although it only works well for relatively small datasets. This kind of plot is sometimes called a \"beeswarm\" and is drawn in seaborn by :func:`swarmplot`, which is activated by setting ``kind=\"swarm\"`` in :func:`catplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", kind=\"swarm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Similar to the relational plots, it's possible to add another dimension to a categorical plot by using a ``hue`` semantic. (The categorical plots do not currently support ``size`` or ``style`` semantics). Each different categorical plotting function handles the ``hue`` semantic differently. For the scatter plots, it is only necessary to change the color of the points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", hue=\"sex\", kind=\"swarm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Unlike with numerical data, it is not always obvious how to order the levels of the categorical variable along its axis. In general, the seaborn categorical plotting functions try to infer the order of categories from the data. If your data have a pandas ``Categorical`` datatype, then the default order of the categories can be set there. If the variable passed to the categorical axis looks numerical, the levels will be sorted. But, by default, the data are still treated as categorical and drawn at ordinal positions on the categorical axes (specifically, at 0, 1, ...) even when numbers are used to label them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips.query(\"size != 3\"), x=\"size\", y=\"total_bill\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As of v0.13.0, all categorical plotting functions have a `native_scale` parameter, which can be set to `True` when you want to use numeric or datetime data for categorical grouping without changing the underlying data properties: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips.query(\"size != 3\"), x=\"size\", y=\"total_bill\", native_scale=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The other option for choosing a default ordering is to take the levels of the category as they appear in the dataset. The ordering can also be controlled on a plot-specific basis using the ``order`` parameter. This can be important when drawing multiple categorical plots in the same figure, which we'll see more of below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"smoker\", y=\"tip\", order=[\"No\", \"Yes\"])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "We've referred to the idea of \"categorical axis\". In these examples, that's always corresponded to the horizontal axis. But it's often helpful to put the categorical variable on the vertical axis (particularly when the category names are relatively long or there are many categories). To do this, swap the assignment of variables to axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"total_bill\", y=\"day\", hue=\"time\", kind=\"swarm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Comparing distributions\n", + "-----------------------\n", + "\n", + "As the size of the dataset grows, categorical scatter plots become limited in the information they can provide about the distribution of values within each category. When this happens, there are several approaches for summarizing the distributional information in ways that facilitate easy comparisons across the category levels.\n", + "\n", + "Boxplots\n", + "^^^^^^^^\n", + "\n", + "The first is the familiar :func:`boxplot`. This kind of plot shows the three quartile values of the distribution along with extreme values. The \"whiskers\" extend to points that lie within 1.5 IQRs of the lower and upper quartile, and then observations that fall outside this range are displayed independently. This means that each value in the boxplot corresponds to an actual observation in the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", kind=\"box\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When adding a ``hue`` semantic, the box for each level of the semantic variable is made narrower and shifted along the categorical axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", hue=\"smoker\", kind=\"box\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This behavior is called \"dodging\", and it is controlled by the `dodge` parameter. By default (as of v0.13.0), elements dodge only if they would otherwise overlap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips[\"weekend\"] = tips[\"day\"].isin([\"Sat\", \"Sun\"])\n", + "sns.catplot(data=tips, x=\"day\", y=\"total_bill\", hue=\"weekend\", kind=\"box\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A related function, :func:`boxenplot`, draws a plot that is similar to a box plot but optimized for showing more information about the shape of the distribution. It is best suited for larger datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.catplot(\n", + " data=diamonds.sort_values(\"color\"),\n", + " x=\"color\", y=\"price\", kind=\"boxen\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Violinplots\n", + "^^^^^^^^^^^\n", + "\n", + "A different approach is a :func:`violinplot`, which combines a boxplot with the kernel density estimation procedure described in the :ref:`distributions ` tutorial:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\", kind=\"violin\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This approach uses the kernel density estimate to provide a richer description of the distribution of values. Additionally, the quartile and whisker values from the boxplot are shown inside the violin. The downside is that, because the violinplot uses a KDE, there are some other parameters that may need tweaking, adding some complexity relative to the straightforward boxplot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, x=\"total_bill\", y=\"day\", hue=\"sex\",\n", + " kind=\"violin\", bw_adjust=.5, cut=0,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to \"split\" the violins, which can allow for a more efficient use of space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, x=\"day\", y=\"total_bill\", hue=\"sex\",\n", + " kind=\"violin\", split=True,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Finally, there are several options for the plot that is drawn on the interior of the violins, including ways to show each individual observation instead of the summary boxplot values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, x=\"day\", y=\"total_bill\", hue=\"sex\",\n", + " kind=\"violin\", inner=\"stick\", split=True, palette=\"pastel\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It can also be useful to combine :func:`swarmplot` or :func:`stripplot` with a box plot or violin plot to show each observation along with a summary of the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.catplot(data=tips, x=\"day\", y=\"total_bill\", kind=\"violin\", inner=None)\n", + "sns.swarmplot(data=tips, x=\"day\", y=\"total_bill\", color=\"k\", size=3, ax=g.ax)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Estimating central tendency\n", + "---------------------------\n", + "\n", + "For other applications, rather than showing the distribution within each category, you might want to show an estimate of the central tendency of the values. Seaborn has two main ways to show this information. Importantly, the basic API for these functions is identical to that for the ones discussed above.\n", + "\n", + "Bar plots\n", + "^^^^^^^^^\n", + "\n", + "A familiar style of plot that accomplishes this goal is a bar plot. In seaborn, the :func:`barplot` function operates on a full dataset and applies a function to obtain the estimate (taking the mean by default). When there are multiple observations in each category, it also uses bootstrapping to compute a confidence interval around the estimate, which is plotted using error bars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "titanic = sns.load_dataset(\"titanic\")\n", + "sns.catplot(data=titanic, x=\"sex\", y=\"survived\", hue=\"class\", kind=\"bar\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The default error bars show 95% confidence intervals, but (starting in v0.12), it is possible to select from a number of other representations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=titanic, x=\"age\", y=\"deck\", errorbar=(\"pi\", 95), kind=\"bar\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A special case for the bar plot is when you want to show the number of observations in each category rather than computing a statistic for a second variable. This is similar to a histogram over a categorical, rather than quantitative, variable. In seaborn, it's easy to do so with the :func:`countplot` function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=titanic, x=\"deck\", kind=\"count\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Both :func:`barplot` and :func:`countplot` can be invoked with all of the options discussed above, along with others that are demonstrated in the detailed documentation for each function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=titanic, y=\"deck\", hue=\"class\", kind=\"count\",\n", + " palette=\"pastel\", edgecolor=\".6\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Point plots\n", + "^^^^^^^^^^^\n", + "\n", + "An alternative style for visualizing the same information is offered by the :func:`pointplot` function. This function also encodes the value of the estimate with height on the other axis, but rather than showing a full bar, it plots the point estimate and confidence interval. Additionally, :func:`pointplot` connects points from the same ``hue`` category. This makes it easy to see how the main relationship is changing as a function of the hue semantic, because your eyes are quite good at picking up on differences of slopes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=titanic, x=\"sex\", y=\"survived\", hue=\"class\", kind=\"point\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "While the categorical functions lack the ``style`` semantic of the relational functions, it can still be a good idea to vary the marker and/or linestyle along with the hue to make figures that are maximally accessible and reproduce well in black and white:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=titanic, x=\"class\", y=\"survived\", hue=\"sex\",\n", + " palette={\"male\": \"g\", \"female\": \"m\"},\n", + " markers=[\"^\", \"o\"], linestyles=[\"-\", \"--\"],\n", + " kind=\"point\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Showing additional dimensions\n", + "-----------------------------\n", + "\n", + "Just like :func:`relplot`, the fact that :func:`catplot` is built on a :class:`FacetGrid` means that it is easy to add faceting variables to visualize higher-dimensional relationships:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(\n", + " data=tips, x=\"day\", y=\"total_bill\", hue=\"smoker\",\n", + " kind=\"swarm\", col=\"time\", aspect=.7,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "For further customization of the plot, you can use the methods on the :class:`FacetGrid` object that it returns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.catplot(\n", + " data=titanic,\n", + " x=\"fare\", y=\"embark_town\", row=\"class\",\n", + " kind=\"box\", orient=\"h\",\n", + " sharex=False, margin_titles=True,\n", + " height=1.5, aspect=4,\n", + ")\n", + "g.set(xlabel=\"Fare\", ylabel=\"\")\n", + "g.set_titles(row_template=\"{row_name} class\")\n", + "for ax in g.axes.flat:\n", + " ax.xaxis.set_major_formatter('${x:.0f}')" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/color_palettes.ipynb b/doc/_tutorial/color_palettes.ipynb new file mode 100644 index 0000000000..79029f421f --- /dev/null +++ b/doc/_tutorial/color_palettes.ipynb @@ -0,0 +1,1004 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + ".. _palette_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Choosing color palettes\n", + "=======================\n", + "\n", + "Seaborn makes it easy to use colors that are well-suited to the characteristics of your data and your visualization goals. This chapter discusses both the general principles that should guide your choices and the tools in seaborn that help you quickly find the best solution for a given application." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib as mpl\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme(style=\"white\", rc={\"xtick.major.pad\": 1, \"ytick.major.pad\": 1})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "np.random.seed(sum(map(ord, \"palettes\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "# Add colormap display methods to matplotlib colormaps.\n", + "# These are forthcoming in matplotlib 3.4, but, the matplotlib display\n", + "# method includes the colormap name, which is redundant.\n", + "def _repr_png_(self):\n", + " \"\"\"Generate a PNG representation of the Colormap.\"\"\"\n", + " import io\n", + " from PIL import Image\n", + " import numpy as np\n", + " IMAGE_SIZE = (400, 50)\n", + " X = np.tile(np.linspace(0, 1, IMAGE_SIZE[0]), (IMAGE_SIZE[1], 1))\n", + " pixels = self(X, bytes=True)\n", + " png_bytes = io.BytesIO()\n", + " Image.fromarray(pixels).save(png_bytes, format='png')\n", + " return png_bytes.getvalue()\n", + " \n", + "def _repr_html_(self):\n", + " \"\"\"Generate an HTML representation of the Colormap.\"\"\"\n", + " import base64\n", + " png_bytes = self._repr_png_()\n", + " png_base64 = base64.b64encode(png_bytes).decode('ascii')\n", + " return ('')\n", + " \n", + "import matplotlib as mpl\n", + "mpl.colors.Colormap._repr_png_ = _repr_png_\n", + "mpl.colors.Colormap._repr_html_ = _repr_html_" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "General principles for using color in plots\n", + "-------------------------------------------\n", + "\n", + "Components of color\n", + "~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Because of the way our eyes work, a particular color can be defined using three components. We usually program colors in a computer by specifying their RGB values, which set the intensity of the red, green, and blue channels in a display. But for analyzing the perceptual attributes of a color, it's better to think in terms of *hue*, *saturation*, and *luminance* channels.\n", + "\n", + "Hue is the component that distinguishes \"different colors\" in a non-technical sense. It's property of color that leads to first-order names like \"red\" and \"blue\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "sns.husl_palette(8, s=.7)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Saturation (or chroma) is the *colorfulness*. Two colors with different hues will look more distinct when they have more saturation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "c = sns.color_palette(\"muted\")[0]\n", + "sns.blend_palette([sns.desaturate(c, 0), c], 8)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "And lightness corresponds to how much light is emitted (or reflected, for printed colors), ranging from black to white:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "sns.blend_palette([\".1\", c, \".95\"], 8)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Vary hue to distinguish categories\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "When you want to represent multiple categories in a plot, you typically should vary the color of the elements. Consider this simple example: in which of these two plots is it easier to count the number of triangular points?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "n = 45\n", + "rng = np.random.default_rng(200)\n", + "x = rng.uniform(0, 1, n * 2)\n", + "y = rng.uniform(0, 1, n * 2)\n", + "a = np.concatenate([np.zeros(n * 2 - 10), np.ones(10)])\n", + "\n", + "f, axs = plt.subplots(1, 2, figsize=(7, 3.5), sharey=True, sharex=True)\n", + "\n", + "sns.scatterplot(\n", + " x=x[::2], y=y[::2], style=a[::2], size=a[::2], legend=False,\n", + " markers=[\"o\", (3, 1, 1)], sizes=[70, 140], ax=axs[0],\n", + ")\n", + "\n", + "sns.scatterplot(\n", + " x=x[1::2], y=y[1::2], style=a[1::2], size=a[1::2], hue=a[1::2], legend=False,\n", + " markers=[\"o\", (3, 1, 1)], sizes=[70, 140], ax=axs[1],\n", + ")\n", + "\n", + "f.tight_layout(w_pad=2)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In the plot on the right, the orange triangles \"pop out\", making it easy to distinguish them from the circles. This pop-out effect happens because our visual system prioritizes color differences.\n", + "\n", + "The blue and orange colors differ mostly in terms of their hue. Hue is useful for representing categories: most people can distinguish a moderate number of hues relatively easily, and points that have different hues but similar brightness or intensity seem equally important. It also makes plots easier to talk about. Consider this example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "b = np.tile(np.arange(10), n // 5)\n", + "\n", + "f, axs = plt.subplots(1, 2, figsize=(7, 3.5), sharey=True, sharex=True)\n", + "\n", + "sns.scatterplot(\n", + " x=x[::2], y=y[::2], hue=b[::2],\n", + " legend=False, palette=\"muted\", s=70, ax=axs[0],\n", + ")\n", + "\n", + "sns.scatterplot(\n", + " x=x[1::2], y=y[1::2], hue=b[1::2],\n", + " legend=False, palette=\"blend:.75,C0\", s=70, ax=axs[1],\n", + ")\n", + "\n", + "f.tight_layout(w_pad=2)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Most people would be able to quickly ascertain that there are five distinct categories in the plot on the left and, if asked to characterize the \"blue\" points, would be able to do so.\n", + "\n", + "With the plot on the right, where the points are all blue but vary in their luminance and saturation, it's harder to say how many unique categories are present. And how would we talk about a particular category? \"The fairly-but-not-too-blue points?\" What's more, the gray dots seem to fade into the background, de-emphasizing them relative to the more intense blue dots. If the categories are equally important, this is a poor representation.\n", + "\n", + "So as a general rule, use hue variation to represent categories. With that said, here are few notes of caution. If you have more than a handful of colors in your plot, it can become difficult to keep in mind what each one means, unless there are pre-existing associations between the categories and the colors used to represent them. This makes your plot harder to interpret: rather than focusing on the data, a viewer will have to continually refer to the legend to make sense of what is shown. So you should strive not to make plots that are too complex. And be mindful that not everyone sees colors the same way. Varying both shape (or some other attribute) and color can help people with anomalous color vision understand your plots, and it can keep them (somewhat) interpretable if they are printed to black-and-white." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Vary luminance to represent numbers\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "On the other hand, hue variations are not well suited to representing numeric data. Consider this example, where we need colors to represent the counts in a bivariate histogram. On the left, we use a circular colormap, where gradual changes in the number of observation within each bin correspond to gradual changes in hue. On the right, we use a palette that uses brighter colors to represent bins with larger counts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "\n", + "f, axs = plt.subplots(1, 2, figsize=(7, 4.25), sharey=True, sharex=True)\n", + "\n", + "sns.histplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " binwidth=(3, .75), cmap=\"hls\", ax=axs[0],\n", + " cbar=True, cbar_kws=dict(orientation=\"horizontal\", pad=.1),\n", + ")\n", + "axs[0].set(xlabel=\"\", ylabel=\"\")\n", + "\n", + "\n", + "sns.histplot(\n", + " data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " binwidth=(3, .75), cmap=\"flare_r\", ax=axs[1],\n", + " cbar=True, cbar_kws=dict(orientation=\"horizontal\", pad=.1),\n", + ")\n", + "axs[1].set(xlabel=\"\", ylabel=\"\")\n", + "\n", + "f.tight_layout(w_pad=3)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "With the hue-based palette, it's quite difficult to ascertain the shape of the bivariate distribution. In contrast, the luminance palette makes it much more clear that there are two prominent peaks.\n", + "\n", + "Varying luminance helps you see structure in data, and changes in luminance are more intuitively processed as changes in importance. But the plot on the right does not use a grayscale colormap. Its colorfulness makes it more interesting, and the subtle hue variation increases the perceptual distance between two values. As a result, small differences slightly easier to resolve.\n", + "\n", + "These examples show that color palette choices are about more than aesthetics: the colors you choose can reveal patterns in your data if used effectively or hide them if used poorly. There is not one optimal palette, but there are palettes that are better or worse for particular datasets and visualization approaches.\n", + "\n", + "And aesthetics do matter: the more that people want to look at your figures, the greater the chance that they will learn something from them. This is true even when you are making plots for yourself. During exploratory data analysis, you may generate many similar figures. Varying the color palettes will add a sense of novelty, which keeps you engaged and prepared to notice interesting features of your data.\n", + "\n", + "So how can you choose color palettes that both represent your data well and look attractive?" + ] + }, + { + "cell_type": "raw", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Tools for choosing color palettes\n", + "---------------------------------\n", + "\n", + "The most important function for working with color palettes is, aptly, :func:`color_palette`. This function provides an interface to most of the possible ways that one can generate color palettes in seaborn. And it's used internally by any function that has a ``palette`` argument.\n", + "\n", + "The primary argument to :func:`color_palette` is usually a string: either the name of a specific palette or the name of a family and additional arguments to select a specific member. In the latter case, :func:`color_palette` will delegate to more specific function, such as :func:`cubehelix_palette`. It's also possible to pass a list of colors specified any way that matplotlib accepts (an RGB tuple, a hex code, or a name in the X11 table). The return value is an object that wraps a list of RGB tuples with a few useful methods, such as conversion to hex codes and a rich HTML representation.\n", + "\n", + "Calling :func:`color_palette` with no arguments will return the current default color palette that matplotlib (and most seaborn functions) will use if colors are not otherwise specified. This default palette can be set with the corresponding :func:`set_palette` function, which calls :func:`color_palette` internally and accepts the same arguments.\n", + "\n", + "To motivate the different options that :func:`color_palette` provides, it will be useful to introduce a classification scheme for color palettes. Broadly, palettes fall into one of three categories:\n", + "\n", + "- qualitative palettes, good for representing categorical data\n", + "- sequential palettes, good for representing numeric data\n", + "- diverging palettes, good for representing numeric data with a categorical boundary" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _qualitative_palettes:\n", + "\n", + "Qualitative color palettes\n", + "--------------------------\n", + "\n", + "Qualitative palettes are well-suited to representing categorical data because most of their variation is in the hue component. The default color palette in seaborn is a qualitative palette with ten distinct hues:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "These colors have the same ordering as the default matplotlib color palette, ``\"tab10\"``, but they are a bit less intense. Compare:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"tab10\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Seaborn in fact has six variations of matplotlib's palette, called ``deep``, ``muted``, ``pastel``, ``bright``, ``dark``, and ``colorblind``. These span a range of average luminance and saturation values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "import io\n", + "from IPython.display import SVG\n", + "f = mpl.figure.Figure(figsize=(6, 6))\n", + "\n", + "ax_locs = dict(\n", + " deep=(.4, .4),\n", + " bright=(.8, .8),\n", + " muted=(.49, .71),\n", + " dark=(.8, .2),\n", + " pastel=(.2, .8),\n", + " colorblind=(.71, .49),\n", + ")\n", + "\n", + "s = .35\n", + "\n", + "for pal, (x, y) in ax_locs.items():\n", + " ax = f.add_axes([x - s / 2, y - s / 2, s, s])\n", + " ax.pie(np.ones(10),\n", + " colors=sns.color_palette(pal, 10),\n", + " counterclock=False, startangle=180,\n", + " wedgeprops=dict(linewidth=1, edgecolor=\"w\"))\n", + " f.text(x, y, pal, ha=\"center\", va=\"center\", size=14,\n", + " bbox=dict(facecolor=\"white\", alpha=0.85, boxstyle=\"round,pad=0.2\"))\n", + "\n", + "f.text(.1, .05, \"Saturation\", size=18, ha=\"left\", va=\"center\",\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"w\"))\n", + "f.text(.05, .1, \"Luminance\", size=18, ha=\"center\", va=\"bottom\", rotation=90,\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"w\"))\n", + "\n", + "ax = f.add_axes([0, 0, 1, 1])\n", + "ax.set_axis_off()\n", + "ax.arrow(.15, .05, .4, 0, width=.002, head_width=.015, color=\".15\")\n", + "ax.arrow(.05, .15, 0, .4, width=.002, head_width=.015, color=\".15\")\n", + "ax.set(xlim=(0, 1), ylim=(0, 1))\n", + "f.savefig(svg:=io.StringIO(), format=\"svg\")\n", + "SVG(svg.getvalue())" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Many people find the moderated hues of the default ``\"deep\"`` palette to be aesthetically pleasing, but they are also less distinct. As a result, they may be more difficult to discriminate in some contexts, which is something to keep in mind when making publication graphics. `This comparison `_ can be helpful for estimating how the seaborn color palettes perform when simulating different forms of colorblindess." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Using circular color systems\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "When you have an arbitrary number of categories, the easiest approach to finding unique hues is to draw evenly-spaced colors in a circular color space (one where the hue changes while keeping the brightness and saturation constant). This is what most seaborn functions default to when they need to use more colors than are currently set in the default color cycle.\n", + "\n", + "The most common way to do this uses the ``hls`` color space, which is a simple transformation of RGB values. We saw this color palette before as a counterexample for how to plot a histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"hls\", 8)" + ] + }, + { + "cell_type": "raw", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + "Because of the way the human visual system works, colors that have the same luminance and saturation in terms of their RGB values won't necessarily look equally intense To remedy this, seaborn provides an interface to the `husl `_ system (since renamed to HSLuv), which achieves less intensity variation as you rotate around the color wheel:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"husl\", 8)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When seaborn needs a categorical palette with more colors than are available in the current default, it will use this approach.\n", + "\n", + "Using categorical Color Brewer palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Another source of visually pleasing categorical palettes comes from the `Color Brewer `_ tool (which also has sequential and diverging palettes, as we'll see below)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Set2\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Be aware that the qualitative Color Brewer palettes have different lengths, and the default behavior of :func:`color_palette` is to give you the full list:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Paired\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _sequential_palettes:\n", + "\n", + "Sequential color palettes\n", + "-------------------------\n", + "\n", + "The second major class of color palettes is called \"sequential\". This kind of mapping is appropriate when data range from relatively low or uninteresting values to relatively high or interesting values (or vice versa). As we saw above, the primary dimension of variation in a sequential palette is luminance. Some seaborn functions will default to a sequential palette when you are mapping numeric data. (For historical reasons, both categorical and numeric mappings are specified with the ``hue`` parameter in functions like :func:`relplot` or :func:`displot`, even though numeric mappings use color palettes with relatively little hue variation).\n", + "\n", + "Perceptually uniform palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Because they are intended to represent numeric values, the best sequential palettes will be *perceptually uniform*, meaning that the relative discriminability of two colors is proportional to the difference between the corresponding data values. Seaborn includes four perceptually uniform sequential colormaps: ``\"rocket\"``, ``\"mako\"``, ``\"flare\"``, and ``\"crest\"``. The first two have a very wide luminance range and are well suited for applications such as heatmaps, where colors fill the space they are plotted into:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"rocket\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"mako\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Because the extreme values of these colormaps approach white, they are not well-suited for coloring elements such as lines or points: it will be difficult to discriminate important values against a white or gray background. The \"flare\" and \"crest\" colormaps are a better choice for such plots. They have a more restricted range of luminance variations, which they compensate for with a slightly more pronounced variation in hue. The default direction of the luminance ramp is also reversed, so that smaller values have lighter colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"flare\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"crest\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It is also possible to use the perceptually uniform colormaps provided by matplotlib, such as ``\"magma\"`` and ``\"viridis\"``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"magma\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"viridis\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As with the convention in matplotlib, every continuous colormap has a reversed version, which has the suffix ``\"_r\"``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"rocket_r\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Discrete vs. continuous mapping\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "One thing to be aware of is that seaborn can generate discrete values from sequential colormaps and, when doing so, it will not use the most extreme values. Compare the discrete version of ``\"rocket\"`` against the continuous version shown above:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"rocket\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Internally, seaborn uses the discrete version for categorical data and the continuous version when in numeric mapping mode. Discrete sequential colormaps can be well-suited for visualizing categorical data with an intrinsic ordering, especially if there is some hue variation." + ] + }, + { + "cell_type": "raw", + "metadata": { + "raw_mimetype": "text/restructuredtext" + }, + "source": [ + ".. _cubehelix_palettes:\n", + "\n", + "Sequential \"cubehelix\" palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The perceptually uniform colormaps are difficult to programmatically generate, because they are not based on the RGB color space. The `cubehelix `_ system offers an RGB-based compromise: it generates sequential palettes with a linear increase or decrease in brightness and some continuous variation in hue. While not perfectly perceptually uniform, the resulting colormaps have many good properties. Importantly, many aspects of the design process are parameterizable.\n", + "\n", + "Matplotlib has the default cubehelix version built into it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"cubehelix\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The default palette returned by the seaborn :func:`cubehelix_palette` function is a bit different from the matplotlib default in that it does not rotate as far around the hue wheel or cover as wide a range of intensities. It also reverses the luminance ramp:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Other arguments to :func:`cubehelix_palette` control how the palette looks. The two main things you'll change are the ``start`` (a value between 0 and 3) and ``rot``, or number of rotations (an arbitrary value, but usually between -1 and 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The more you rotate, the more hue variation you will see:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You can control both how dark and light the endpoints are and their order:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.cubehelix_palette(start=2, rot=0, dark=0, light=.95, reverse=True, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :func:`color_palette` accepts a string code, starting with ``\"ch:\"``, for generating an arbitrary cubehelix palette. You can passs the names of parameters in the string:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"ch:start=.2,rot=-.3\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "And for compactness, each parameter can be specified with its first letter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"ch:s=-.2,r=.6\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Custom sequential palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "For a simpler interface to custom sequential palettes, you can use :func:`light_palette` or :func:`dark_palette`, which are both seeded with a single color and produce a palette that ramps either from light or dark desaturated values to that color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.light_palette(\"seagreen\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.dark_palette(\"#69d\", reverse=True, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As with cubehelix palettes, you can also specify light or dark palettes through :func:`color_palette` or anywhere ``palette`` is accepted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"light:b\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Reverse the colormap by adding ``\"_r\"``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"dark:salmon_r\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Sequential Color Brewer palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The Color Brewer library also has some good options for sequential palettes. They include palettes with one primary hue:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Blues\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Along with multi-hue options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"YlOrBr\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _diverging_palettes:\n", + "\n", + "Diverging color palettes\n", + "------------------------\n", + "\n", + "The third class of color palettes is called \"diverging\". These are used for data where both large low and high values are interesting and span a midpoint value (often 0) that should be de-emphasized. The rules for choosing good diverging palettes are similar to good sequential palettes, except now there should be two dominant hues in the colormap, one at (or near) each pole. It's also important that the starting values are of similar brightness and saturation.\n", + "\n", + "Perceptually uniform diverging palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Seaborn includes two perceptually uniform diverging palettes: ``\"vlag\"`` and ``\"icefire\"``. They both use blue and red at their poles, which many intuitively processes as \"cold\" and \"hot\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"vlag\", as_cmap=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"icefire\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Custom diverging palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "You can also use the seaborn function :func:`diverging_palette` to create a custom colormap for diverging data. This function makes diverging palettes using the ``husl`` color system. You pass it two hues (in degrees) and, optionally, the lightness and saturation values for the extremes. Using ``husl`` means that the extreme values, and the resulting ramps to the midpoint, while not perfectly perceptually uniform, will be well-balanced:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(220, 20, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This is convenient when you want to stray from the boring confines of cold-hot approaches:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(145, 300, s=60, as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to make a palette where the midpoint is dark rather than light:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.diverging_palette(250, 30, l=65, center=\"dark\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's important to emphasize here that using red and green, while intuitive, `should be avoided `_.\n", + "\n", + "Other diverging palettes\n", + "~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "There are a few other good diverging palettes built into matplotlib, including Color Brewer palettes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"Spectral\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "And the ``coolwarm`` palette, which has less contrast between the middle values and the extremes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.color_palette(\"coolwarm\", as_cmap=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As you can see, there are many options for using color in your visualizations. Seaborn tries both to use good defaults and to offer a lot of flexibility.\n", + "\n", + "This discussion is only the beginning, and there are a number of good resources for learning more about techniques for using color in visualizations. One great example is this `series of blog posts `_ from the NASA Earth Observatory. The matplotlib docs also have a `nice tutorial `_ that illustrates some of the perceptual properties of their colormaps." + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/data_structure.ipynb b/doc/_tutorial/data_structure.ipynb new file mode 100644 index 0000000000..a474477002 --- /dev/null +++ b/doc/_tutorial/data_structure.ipynb @@ -0,0 +1,497 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _data_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Data structures accepted by seaborn\n", + "===================================\n", + "\n", + "As a data visualization library, seaborn requires that you provide it with data. This chapter explains the various ways to accomplish that task. Seaborn supports several different dataset formats, and most functions accept data represented with objects from the `pandas `_ or `numpy `_ libraries as well as built-in Python types like lists and dictionaries. Understanding the usage patterns associated with these different options will help you quickly create useful visualizations for nearly any dataset.\n", + "\n", + ".. note::\n", + " As of current writing (v0.13.0), the full breadth of options covered here are supported by most, but not all, of the functions in seaborn. Namely, a few older functions (e.g., :func:`lmplot` and :func:`regplot`) are more limited in what they accept." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Long-form vs. wide-form data\n", + "----------------------------\n", + "\n", + "Most plotting functions in seaborn are oriented towards *vectors* of data. When plotting ``x`` against ``y``, each variable should be a vector. Seaborn accepts data *sets* that have more than one vector organized in some tabular fashion. There is a fundamental distinction between \"long-form\" and \"wide-form\" data tables, and seaborn will treat each differently.\n", + "\n", + "Long-form data\n", + "~~~~~~~~~~~~~~\n", + "\n", + "A long-form data table has the following characteristics:\n", + "\n", + "- Each variable is a column\n", + "- Each observation is a row" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As a simple example, consider the \"flights\" dataset, which records the number of airline passengers who flew in each month from 1949 to 1960. This dataset has three variables (*year*, *month*, and number of *passengers*):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights = sns.load_dataset(\"flights\")\n", + "flights.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "With long-form data, columns in the table are given roles in the plot by explicitly assigning them to one of the variables. For example, making a monthly plot of the number of passengers per year looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=flights, x=\"year\", y=\"passengers\", hue=\"month\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The advantage of long-form data is that it lends itself well to this explicit specification of the plot. It can accommodate datasets of arbitrary complexity, so long as the variables and observations can be clearly defined. But this format takes some getting used to, because it is often not the model of the data that one has in their head.\n", + "\n", + "Wide-form data\n", + "~~~~~~~~~~~~~~\n", + "\n", + "For simple datasets, it is often more intuitive to think about data the way it might be viewed in a spreadsheet, where the columns and rows contain *levels* of different variables. For example, we can convert the flights dataset into a wide-form organization by \"pivoting\" it so that each column has each month's time series over years:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide = flights.pivot(index=\"year\", columns=\"month\", values=\"passengers\")\n", + "flights_wide.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Here we have the same three variables, but they are organized differently. The variables in this dataset are linked to the *dimensions* of the table, rather than to named fields. Each observation is defined by both the value at a cell in the table and the coordinates of that cell with respect to the row and column indices." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "With long-form data, we can access variables in the dataset by their name. That is not the case with wide-form data. Nevertheless, because there is a clear association between the dimensions of the table and the variable in the dataset, seaborn is able to assign those variables roles in the plot.\n", + "\n", + ".. note::\n", + " Seaborn treats the argument to ``data`` as wide form when neither ``x`` nor ``y`` are assigned." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=flights_wide, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This plot looks very similar to the one before. Seaborn has assigned the index of the dataframe to ``x``, the values of the dataframe to ``y``, and it has drawn a separate line for each month. There is a notable difference between the two plots, however. When the dataset went through the \"pivot\" operation that converted it from long-form to wide-form, the information about what the values mean was lost. As a result, there is no y axis label. (The lines also have dashes here, because :func:`relplot` has mapped the column variable to both the ``hue`` and ``style`` semantic so that the plot is more accessible. We didn't do that in the long-form case, but we could have by setting ``style=\"month\"``).\n", + "\n", + "Thus far, we did much less typing while using wide-form data and made nearly the same plot. This seems easier! But a big advantage of long-form data is that, once you have the data in the correct format, you no longer need to think about its *structure*. You can design your plots by thinking only about the variables contained within it. For example, to draw lines that represent the monthly time series for each year, simply reassign the variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=flights, x=\"month\", y=\"passengers\", hue=\"year\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To achieve the same remapping with the wide-form dataset, we would need to transpose the table:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=flights_wide.transpose(), kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "(This example also illustrates another wrinkle, which is that seaborn currently considers the column variable in a wide-form dataset to be categorical regardless of its datatype, whereas, because the long-form variable is numeric, it is assigned a quantitative color palette and legend. This may change in the future).\n", + "\n", + "The absence of explicit variable assignments also means that each plot type needs to define a fixed mapping between the dimensions of the wide-form data and the roles in the plot. Because this natural mapping may vary across plot types, the results are less predictable when using wide-form data. For example, the :ref:`categorical ` plots assign the *column* dimension of the table to ``x`` and then aggregate across the rows (ignoring the index):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=flights_wide, kind=\"box\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When using pandas to represent wide-form data, you are limited to just a few variables (no more than three). This is because seaborn does not make use of multi-index information, which is how pandas represents additional variables in a tabular format. The `xarray `_ project offers labeled N-dimensional array objects, which can be considered a generalization of wide-form data to higher dimensions. At present, seaborn does not directly support objects from ``xarray``, but they can be transformed into a long-form :class:`pandas.DataFrame` using the ``to_pandas`` method and then plotted in seaborn like any other long-form data set.\n", + "\n", + "In summary, we can think of long-form and wide-form datasets as looking something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "f = plt.figure(figsize=(7, 5))\n", + "\n", + "gs = plt.GridSpec(\n", + " ncols=6, nrows=2, figure=f,\n", + " left=0, right=.35, bottom=0, top=.9,\n", + " height_ratios=(1, 20),\n", + " wspace=.1, hspace=.01\n", + ")\n", + "\n", + "colors = [c + (.5,) for c in sns.color_palette()]\n", + "\n", + "f.add_subplot(gs[0, :], facecolor=\".8\")\n", + "[\n", + " f.add_subplot(gs[1:, i], facecolor=colors[i])\n", + " for i in range(gs.ncols)\n", + "]\n", + "\n", + "gs = plt.GridSpec(\n", + " ncols=2, nrows=2, figure=f,\n", + " left=.4, right=1, bottom=.2, top=.8,\n", + " height_ratios=(1, 8), width_ratios=(1, 11),\n", + " wspace=.015, hspace=.02\n", + ")\n", + "\n", + "f.add_subplot(gs[0, 1:], facecolor=colors[2])\n", + "f.add_subplot(gs[1:, 0], facecolor=colors[1])\n", + "f.add_subplot(gs[1, 1], facecolor=colors[0])\n", + "\n", + "for ax in f.axes:\n", + " ax.set(xticks=[], yticks=[])\n", + "\n", + "f.text(.35 / 2, .91, \"Long-form\", ha=\"center\", va=\"bottom\", size=15)\n", + "f.text(.7, .81, \"Wide-form\", ha=\"center\", va=\"bottom\", size=15)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Messy data\n", + "~~~~~~~~~~\n", + "\n", + "Many datasets cannot be clearly interpreted using either long-form or wide-form rules. If datasets that are clearly long-form or wide-form are `\"tidy\" `_, we might say that these more ambiguous datasets are \"messy\". In a messy dataset, the variables are neither uniquely defined by the keys nor by the dimensions of the table. This often occurs with *repeated-measures* data, where it is natural to organize a table such that each row corresponds to the *unit* of data collection. Consider this simple dataset from a psychology experiment in which twenty subjects performed a memory task where they studied anagrams while their attention was either divided or focused:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anagrams = sns.load_dataset(\"anagrams\")\n", + "anagrams" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The attention variable is *between-subjects*, but there is also a *within-subjects* variable: the number of possible solutions to the anagrams, which varied from 1 to 3. The dependent measure is a score of memory performance. These two variables (number and score) are jointly encoded across several columns. As a result, the whole dataset is neither clearly long-form nor clearly wide-form.\n", + "\n", + "How might we tell seaborn to plot the average score as a function of attention and number of solutions? We'd first need to coerce the data into one of our two structures. Let's transform it to a tidy long-form table, such that each variable is a column and each row is an observation. We can use the method :meth:`pandas.DataFrame.melt` to accomplish this task:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anagrams_long = anagrams.melt(id_vars=[\"subidr\", \"attnr\"], var_name=\"solutions\", value_name=\"score\")\n", + "anagrams_long.head()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Now we can make the plot that we want:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=anagrams_long, x=\"solutions\", y=\"score\", hue=\"attnr\", kind=\"point\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Further reading and take-home points\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "For a longer discussion about tabular data structures, you could read the `\"Tidy Data\" `_ paper by Hadley Whickham. Note that seaborn uses a slightly different set of concepts than are defined in the paper. While the paper associates tidyness with long-form structure, we have drawn a distinction between \"tidy wide-form\" data, where there is a clear mapping between variables in the dataset and the dimensions of the table, and \"messy data\", where no such mapping exists.\n", + "\n", + "The long-form structure has clear advantages. It allows you to create figures by explicitly assigning variables in the dataset to roles in plot, and you can do so with more than three variables. When possible, try to represent your data with a long-form structure when embarking on serious analysis. Most of the examples in the seaborn documentation will use long-form data. But in cases where it is more natural to keep the dataset wide, remember that seaborn can remain useful." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Options for visualizing long-form data\n", + "--------------------------------------\n", + "\n", + "While long-form data has a precise definition, seaborn is fairly flexible in terms of how it is actually organized across the data structures in memory. The examples in the rest of the documentation will typically use :class:`pandas.DataFrame` objects and reference variables in them by assigning names of their columns to the variables in the plot. But it is also possible to store vectors in a Python dictionary or a class that implements that interface:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_dict = flights.to_dict()\n", + "sns.relplot(data=flights_dict, x=\"year\", y=\"passengers\", hue=\"month\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Many pandas operations, such as the split-apply-combine operations of a group-by, will produce a dataframe where information has moved from the columns of the input dataframe to the index of the output. So long as the name is retained, you can still reference the data as normal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_avg = flights.groupby(\"year\").mean(numeric_only=True)\n", + "sns.relplot(data=flights_avg, x=\"year\", y=\"passengers\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Additionally, it's possible to pass vectors of data directly as arguments to ``x``, ``y``, and other plotting variables. If these vectors are pandas objects, the ``name`` attribute will be used to label the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "year = flights_avg.index\n", + "passengers = flights_avg[\"passengers\"]\n", + "sns.relplot(x=year, y=passengers, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Numpy arrays and other objects that implement the Python sequence interface work too, but if they don't have names, the plot will not be as informative without further tweaking:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(x=year.to_numpy(), y=passengers.to_list(), kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Options for visualizing wide-form data\n", + "--------------------------------------\n", + "\n", + "The options for passing wide-form data are even more flexible. As with long-form data, pandas objects are preferable because the name (and, in some cases, index) information can be used. But in essence, any format that can be viewed as a single vector or a collection of vectors can be passed to ``data``, and a valid plot can usually be constructed.\n", + "\n", + "The example we saw above used a rectangular :class:`pandas.DataFrame`, which can be thought of as a collection of its columns. A dict or list of pandas objects will also work, but we'll lose the axis labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_wide_list = [col for _, col in flights_wide.items()]\n", + "sns.relplot(data=flights_wide_list, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The vectors in a collection do not need to have the same length. If they have an ``index``, it will be used to align them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "two_series = [flights_wide.loc[:1955, \"Jan\"], flights_wide.loc[1952:, \"Aug\"]]\n", + "sns.relplot(data=two_series, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Whereas an ordinal index will be used for numpy arrays or simple Python sequences:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "two_arrays = [s.to_numpy() for s in two_series]\n", + "sns.relplot(data=two_arrays, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "But a dictionary of such vectors will at least use the keys:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "two_arrays_dict = {s.name: s.to_numpy() for s in two_series}\n", + "sns.relplot(data=two_arrays_dict, kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Rectangular numpy arrays are treated just like a dataframe without index information, so they are viewed as a collection of column vectors. Note that this is different from how numpy indexing operations work, where a single indexer will access a row. But it is consistent with how pandas would turn the array into a dataframe or how matplotlib would plot it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flights_array = flights_wide.to_numpy()\n", + "sns.relplot(data=flights_array, kind=\"line\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/distributions.ipynb b/doc/_tutorial/distributions.ipynb new file mode 100644 index 0000000000..66d783143a --- /dev/null +++ b/doc/_tutorial/distributions.ipynb @@ -0,0 +1,858 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _distribution_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Visualizing distributions of data\n", + "==================================\n", + "\n", + "An early step in any effort to analyze or model data should be to understand how the variables are distributed. Techniques for distribution visualization can provide quick answers to many important questions. What range do the observations cover? What is their central tendency? Are they heavily skewed in one direction? Is there evidence for bimodality? Are there significant outliers? Do the answers to these questions vary across subsets defined by other variables?\n", + "\n", + "The :ref:`distributions module ` contains several functions designed to answer questions such as these. The axes-level functions are :func:`histplot`, :func:`kdeplot`, :func:`ecdfplot`, and :func:`rugplot`. They are grouped together within the figure-level :func:`displot`, :func:`jointplot`, and :func:`pairplot` functions.\n", + "\n", + "There are several different approaches to visualizing a distribution, and each has its relative advantages and drawbacks. It is important to understand these factors so that you can choose the best approach for your particular aim." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import seaborn as sns; sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _tutorial_hist:\n", + "\n", + "Plotting univariate histograms\n", + "------------------------------\n", + "\n", + "Perhaps the most common approach to visualizing a distribution is the *histogram*. This is the default approach in :func:`displot`, which uses the same underlying code as :func:`histplot`. A histogram is a bar plot where the axis representing the data variable is divided into a set of discrete bins and the count of observations falling within each bin is shown using the height of the corresponding bar:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.displot(penguins, x=\"flipper_length_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This plot immediately affords a few insights about the ``flipper_length_mm`` variable. For instance, we can see that the most common flipper length is about 195 mm, but the distribution appears bimodal, so this one number does not represent the data well.\n", + "\n", + "Choosing the bin size\n", + "^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "The size of the bins is an important parameter, and using the wrong bin size can mislead by obscuring important features of the data or by creating apparent features out of random variability. By default, :func:`displot`/:func:`histplot` choose a default bin size based on the variance of the data and the number of observations. But you should not be over-reliant on such automatic approaches, because they depend on particular assumptions about the structure of your data. It is always advisable to check that your impressions of the distribution are consistent across different bin sizes. To choose the size directly, set the `binwidth` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", binwidth=3)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In other circumstances, it may make more sense to specify the *number* of bins, rather than their size:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", bins=20)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "One example of a situation where defaults fail is when the variable takes a relatively small number of integer values. In that case, the default bin width may be too small, creating awkward gaps in the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.displot(tips, x=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "One approach would be to specify the precise bin breaks by passing an array to ``bins``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(tips, x=\"size\", bins=[1, 2, 3, 4, 5, 6, 7])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This can also be accomplished by setting ``discrete=True``, which chooses bin breaks that represent the unique values in a dataset with bars that are centered on their corresponding value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(tips, x=\"size\", discrete=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to visualize the distribution of a categorical variable using the logic of a histogram. Discrete bins are automatically set for categorical variables, but it may also be helpful to \"shrink\" the bars slightly to emphasize the categorical nature of the axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(tips, x=\"day\", shrink=.8)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Conditioning on other variables\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Once you understand the distribution of a variable, the next step is often to ask whether features of that distribution differ across other variables in the dataset. For example, what accounts for the bimodal distribution of flipper lengths that we saw above? :func:`displot` and :func:`histplot` provide support for conditional subsetting via the ``hue`` semantic. Assigning a variable to ``hue`` will draw a separate histogram for each of its unique values and distinguish them by color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, the different histograms are \"layered\" on top of each other and, in some cases, they may be difficult to distinguish. One option is to change the visual representation of the histogram from a bar plot to a \"step\" plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", element=\"step\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Alternatively, instead of layering each bar, they can be \"stacked\", or moved vertically. In this plot, the outline of the full histogram will match the plot with only a single variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The stacked histogram emphasizes the part-whole relationship between the variables, but it can obscure other features (for example, it is difficult to determine the mode of the Adelie distribution). Another option is \"dodge\" the bars, which moves them horizontally and reduces their width. This ensures that there are no overlaps and that the bars remain comparable in terms of height. But it only works well when the categorical variable has a small number of levels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"sex\", multiple=\"dodge\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Because :func:`displot` is a figure-level function and is drawn onto a :class:`FacetGrid`, it is also possible to draw each individual distribution in a separate subplot by assigning the second variable to ``col`` or ``row`` rather than (or in addition to) ``hue``. This represents the distribution of each subset well, but it makes it more difficult to draw direct comparisons:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", col=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "None of these approaches are perfect, and we will soon see some alternatives to a histogram that are better-suited to the task of comparison.\n", + "\n", + "Normalized histogram statistics\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Before we do, another point to note is that, when the subsets have unequal numbers of observations, comparing their distributions in terms of counts may not be ideal. One solution is to *normalize* the counts using the ``stat`` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", stat=\"density\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "By default, however, the normalization is applied to the entire distribution, so this simply rescales the height of the bars. By setting ``common_norm=False``, each subset will be normalized independently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", stat=\"density\", common_norm=False)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Density normalization scales the bars so that their *areas* sum to 1. As a result, the density axis is not directly interpretable. Another option is to normalize the bars to that their *heights* sum to 1. This makes most sense when the variable is discrete, but it is an option for all histograms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", stat=\"probability\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _tutorial_kde:\n", + "\n", + "Kernel density estimation\n", + "-------------------------\n", + "\n", + "A histogram aims to approximate the underlying probability density function that generated the data by binning and counting observations. Kernel density estimation (KDE) presents a different solution to the same problem. Rather than using discrete bins, a KDE plot smooths the observations with a Gaussian kernel, producing a continuous density estimate:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Choosing the smoothing bandwidth\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Much like with the bin size in the histogram, the ability of the KDE to accurately represent the data depends on the choice of smoothing bandwidth. An over-smoothed estimate might erase meaningful features, but an under-smoothed estimate can obscure the true shape within random noise. The easiest way to check the robustness of the estimate is to adjust the default bandwidth:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", kind=\"kde\", bw_adjust=.25)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Note how the narrow bandwidth makes the bimodality much more apparent, but the curve is much less smooth. In contrast, a larger bandwidth obscures the bimodality almost completely:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", kind=\"kde\", bw_adjust=2)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Conditioning on other variables\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "As with histograms, if you assign a ``hue`` variable, a separate density estimate will be computed for each level of that variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In many cases, the layered KDE is easier to interpret than the layered histogram, so it is often a good choice for the task of comparison. Many of the same options for resolving multiple distributions apply to the KDE as well, however:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", kind=\"kde\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Note how the stacked plot filled in the area between each curve by default. It is also possible to fill in the curves for single or layered densities, although the default alpha value (opacity) will be different, so that the individual densities are easier to resolve." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", kind=\"kde\", fill=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Kernel density estimation pitfalls\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "KDE plots have many advantages. Important features of the data are easy to discern (central tendency, bimodality, skew), and they afford easy comparisons between subsets. But there are also situations where KDE poorly represents the underlying data. This is because the logic of KDE assumes that the underlying distribution is smooth and unbounded. One way this assumption can fail is when a variable reflects a quantity that is naturally bounded. If there are observations lying close to the bound (for example, small values of a variable that cannot be negative), the KDE curve may extend to unrealistic values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(tips, x=\"total_bill\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This can be partially avoided with the ``cut`` parameter, which specifies how far the curve should extend beyond the extreme datapoints. But this influences only where the curve is drawn; the density estimate will still smooth over the range where no data can exist, causing it to be artificially low at the extremes of the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(tips, x=\"total_bill\", kind=\"kde\", cut=0)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The KDE approach also fails for discrete data or when data are naturally continuous but specific values are over-represented. The important thing to keep in mind is that the KDE will *always show you a smooth curve*, even when the data themselves are not smooth. For example, consider this distribution of diamond weights:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.displot(diamonds, x=\"carat\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "While the KDE suggests that there are peaks around specific values, the histogram reveals a much more jagged distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(diamonds, x=\"carat\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As a compromise, it is possible to combine these two approaches. While in histogram mode, :func:`displot` (as with :func:`histplot`) has the option of including the smoothed KDE curve (note ``kde=True``, not ``kind=\"kde\"``):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(diamonds, x=\"carat\", kde=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _tutorial_ecdf:\n", + "\n", + "Empirical cumulative distributions\n", + "----------------------------------\n", + "\n", + "A third option for visualizing distributions computes the \"empirical cumulative distribution function\" (ECDF). This plot draws a monotonically-increasing curve through each datapoint such that the height of the curve reflects the proportion of observations with a smaller value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", kind=\"ecdf\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ECDF plot has two key advantages. Unlike the histogram or KDE, it directly represents each datapoint. That means there is no bin size or smoothing parameter to consider. Additionally, because the curve is monotonically increasing, it is well-suited for comparing multiple distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"flipper_length_mm\", hue=\"species\", kind=\"ecdf\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The major downside to the ECDF plot is that it represents the shape of the distribution less intuitively than a histogram or density curve. Consider how the bimodality of flipper lengths is immediately apparent in the histogram, but to see it in the ECDF plot, you must look for varying slopes. Nevertheless, with practice, you can learn to answer all of the important questions about a distribution by examining the ECDF, and doing so can be a powerful approach." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Visualizing bivariate distributions\n", + "-----------------------------------\n", + "\n", + "All of the examples so far have considered *univariate* distributions: distributions of a single variable, perhaps conditional on a second variable assigned to ``hue``. Assigning a second variable to ``y``, however, will plot a *bivariate* distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A bivariate histogram bins the data within rectangles that tile the plot and then shows the count of observations within each rectangle with the fill color (analogous to a :func:`heatmap`). Similarly, a bivariate KDE plot smoothes the (x, y) observations with a 2D Gaussian. The default representation then shows the *contours* of the 2D density:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Assigning a ``hue`` variable will plot multiple heatmaps or contour sets using different colors. For bivariate histograms, this will only work well if there is minimal overlap between the conditional distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The contour approach of the bivariate KDE plot lends itself better to evaluating overlap, although a plot with too many contours can get busy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Just as with univariate plots, the choice of bin size or smoothing bandwidth will determine how well the plot represents the underlying bivariate distribution. The same parameters apply, but they can be tuned for each variable by passing a pair of values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", binwidth=(2, .5))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To aid interpretation of the heatmap, add a colorbar to show the mapping between counts and color intensity:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", binwidth=(2, .5), cbar=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The meaning of the bivariate density contours is less straightforward. Because the density is not directly interpretable, the contours are drawn at *iso-proportions* of the density, meaning that each curve shows a level set such that some proportion *p* of the density lies below it. The *p* values are evenly spaced, with the lowest level contolled by the ``thresh`` parameter and the number controlled by ``levels``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"kde\", thresh=.2, levels=4)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The ``levels`` parameter also accepts a list of values, for more control:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\", kind=\"kde\", levels=[.01, .05, .1, .8])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The bivariate histogram allows one or both variables to be discrete. Plotting one discrete and one continuous variable offers another way to compare conditional univariate distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(diamonds, x=\"price\", y=\"clarity\", log_scale=(True, False))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In contrast, plotting two discrete variables is an easy to way show the cross-tabulation of the observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(diamonds, x=\"color\", y=\"clarity\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Distribution visualization in other settings\n", + "--------------------------------------------\n", + "\n", + "Several other figure-level plotting functions in seaborn make use of the :func:`histplot` and :func:`kdeplot` functions.\n", + "\n", + "\n", + "Plotting joint and marginal distributions\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "The first is :func:`jointplot`, which augments a bivariate relational or distribution plot with the marginal distributions of the two variables. By default, :func:`jointplot` represents the bivariate distribution using :func:`scatterplot` and the marginal distributions using :func:`histplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Similar to :func:`displot`, setting a different ``kind=\"kde\"`` in :func:`jointplot` will change both the joint and marginal plots the use :func:`kdeplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(\n", + " data=penguins,\n", + " x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"species\",\n", + " kind=\"kde\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ":func:`jointplot` is a convenient interface to the :class:`JointGrid` class, which offeres more flexibility when used directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.JointGrid(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "g.plot_joint(sns.histplot)\n", + "g.plot_marginals(sns.boxplot)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A less-obtrusive way to show marginal distributions uses a \"rug\" plot, which adds a small tick on the edge of the plot to represent each individual observation. This is built into :func:`displot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(\n", + " penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " kind=\"kde\", rug=True\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "And the axes-level :func:`rugplot` function can be used to add rugs on the side of any other kind of plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + "sns.rugplot(data=penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Plotting many distributions\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "The :func:`pairplot` function offers a similar blend of joint and marginal distributions. Rather than focusing on a single relationship, however, :func:`pairplot` uses a \"small-multiple\" approach to visualize the univariate distribution of all variables in a dataset along with all of their pairwise relationships:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(penguins)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As with :func:`jointplot`/:class:`JointGrid`, using the underlying :class:`PairGrid` directly will afford more flexibility with only a bit more typing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins)\n", + "g.map_upper(sns.histplot)\n", + "g.map_lower(sns.kdeplot, fill=True)\n", + "g.map_diag(sns.histplot, kde=True)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/error_bars.ipynb b/doc/_tutorial/error_bars.ipynb new file mode 100644 index 0000000000..f101a80edf --- /dev/null +++ b/doc/_tutorial/error_bars.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _errorbar_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme(style=\"darkgrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "np.random.seed(sum(map(ord, \"errorbars\")))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Statistical estimation and error bars\n", + "=====================================\n", + "\n", + "Data visualization sometimes involves a step of aggregation or estimation, where multiple data points are reduced to a summary statistic such as the mean or median. When showing a summary statistic, it is usually appropriate to add *error bars*, which provide a visual cue about how well the summary represents the underlying data points.\n", + "\n", + "Several seaborn functions will automatically calculate both summary statistics and the error bars when given a full dataset. This chapter explains how you can control what the error bars show and why you might choose each of the options that seaborn affords.\n", + "\n", + "The error bars around an estimate of central tendency can show one of two general things: either the range of uncertainty about the estimate or the spread of the underlying data around it. These measures are related: given the same sample size, estimates will be more uncertain when data has a broader spread. But uncertainty will decrease as sample sizes grow, whereas spread will not.\n", + "\n", + "In seaborn, there are two approaches for constructing each kind of error bar. One approach is parametric, using a formula that relies on assumptions about the shape of the distribution. The other approach is nonparametric, using only the data that you provide.\n", + "\n", + "Your choice is made with the `errorbar` parameter, which exists for each function that does estimation as part of plotting. This parameter accepts the name of the method to use and, optionally, a parameter that controls the size of the interval. The choices can be defined in a 2D taxonomy that depends on what is shown and how it is constructed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "import io\n", + "from IPython.display import SVG\n", + "f = mpl.figure.Figure(figsize=(8, 5))\n", + "axs = f.subplots(2, 2, sharex=True, sharey=True,)\n", + "\n", + "plt.setp(axs, xlim=(-1, 1), ylim=(-1, 1), xticks=[], yticks=[])\n", + "for ax, color in zip(axs.flat, [\"C0\", \"C2\", \"C3\", \"C1\"]):\n", + " ax.set_facecolor(mpl.colors.to_rgba(color, .25))\n", + "\n", + "kws = dict(x=0, y=.2, ha=\"center\", va=\"center\", size=18)\n", + "axs[0, 0].text(s=\"Standard deviation\", **kws)\n", + "axs[0, 1].text(s=\"Standard error\", **kws)\n", + "axs[1, 0].text(s=\"Percentile interval\", **kws)\n", + "axs[1, 1].text(s=\"Confidence interval\", **kws)\n", + "\n", + "kws = dict(x=0, y=-.2, ha=\"center\", va=\"center\", size=18, font=\"Courier New\")\n", + "axs[0, 0].text(s='errorbar=(\"sd\", scale)', **kws)\n", + "axs[0, 1].text(s='errorbar=(\"se\", scale)', **kws)\n", + "axs[1, 0].text(s='errorbar=(\"pi\", width)', **kws)\n", + "axs[1, 1].text(s='errorbar=(\"ci\", width)', **kws)\n", + "\n", + "kws = dict(size=18)\n", + "axs[0, 0].set_title(\"Spread\", **kws)\n", + "axs[0, 1].set_title(\"Uncertainty\", **kws)\n", + "axs[0, 0].set_ylabel(\"Parametric\", **kws)\n", + "axs[1, 0].set_ylabel(\"Nonparametric\", **kws)\n", + "\n", + "f.tight_layout()\n", + "f.subplots_adjust(hspace=.05, wspace=.05 * (4 / 6))\n", + "f.savefig(svg:=io.StringIO(), format=\"svg\")\n", + "SVG(svg.getvalue())" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You will note that the size parameter is defined differently for the parametric and nonparametric approaches. For parametric error bars, it is a scalar factor that is multiplied by the statistic defining the error (standard error or standard deviation). For nonparametric error bars, it is a percentile width. This is explained further for each specific approach below.\n", + "\n", + "\n", + ".. note::\n", + " The `errorbar` API described here was introduced in seaborn v0.12. In prior versions, the only options were to show a bootstrap confidence interval or a standard deviation, with the choice controlled by the `ci` parameter (i.e., `ci=` or `ci=\"sd\"`).\n", + "\n", + "To compare the different parameterizations, we'll use the following helper function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_errorbars(arg, **kws):\n", + " np.random.seed(sum(map(ord, \"error_bars\")))\n", + " x = np.random.normal(0, 1, 100)\n", + " f, axs = plt.subplots(2, figsize=(7, 2), sharex=True, layout=\"tight\")\n", + " sns.pointplot(x=x, errorbar=arg, **kws, capsize=.3, ax=axs[0])\n", + " sns.stripplot(x=x, jitter=.3, ax=axs[1])" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Measures of data spread\n", + "-----------------------\n", + "\n", + "Error bars that represent data spread present a compact display of the distribution, using three numbers where :func:`boxplot` would use 5 or more and :func:`violinplot` would use a complicated algorithm.\n", + "\n", + "Standard deviation error bars\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Standard deviation error bars are the simplest to explain, because the standard deviation is a familiar statistic. It is the average distance from each data point to the sample mean. By default, `errorbar=\"sd\"` will draw error bars at +/- 1 sd around the estimate, but the range can be increased by passing a scaling size parameter. Note that, assuming normally-distributed data, ~68% of the data will lie within one standard deviation, ~95% will lie within two, and ~99.7% will lie within three:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(\"sd\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Percentile interval error bars\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Percentile intervals also represent the range where some amount of the data fall, but they do so by \n", + "computing those percentiles directly from your sample. By default, `errorbar=\"pi\"` will show a 95% interval, ranging from the 2.5 to the 97.5 percentiles. You can choose a different range by passing a size parameter, e.g., to show the inter-quartile range:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars((\"pi\", 50))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The standard deviation error bars will always be symmetrical around the estimate. This can be a problem when the data are skewed, especially if there are natural bounds (e.g., if the data represent a quantity that can only be positive). In some cases, standard deviation error bars may extend to \"impossible\" values. The nonparametric approach does not have this problem, because it can account for asymmetrical spread and will never extend beyond the range of the data." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Measures of estimate uncertainty\n", + "--------------------------------\n", + "\n", + "If your data are a random sample from a larger population, then the mean (or other estimate) will be an imperfect measure of the true population average. Error bars that show estimate uncertainty try to represent the range of likely values for the true parameter.\n", + "\n", + "Standard error bars\n", + "~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The standard error statistic is related to the standard deviation: in fact it is just the standard deviation divided by the square root of the sample size. The default, with `errorbar=\"se\"`, draws an interval +/-1 standard error from the mean:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(\"se\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Confidence interval error bars\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The nonparametric approach to representing uncertainty uses *bootstrapping*: a procedure where the dataset is randomly resampled with replacement a number of times, and the estimate is recalculated from each resample. This procedure creates a distribution of statistics approximating the distribution of values that you could have gotten for your estimate if you had a different sample.\n", + "\n", + "The confidence interval is constructed by taking a percentile interval of the *bootstrap distribution*. By default `errorbar=\"ci\"` draws a 95% confidence interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(\"ci\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The seaborn terminology is somewhat specific, because a confidence interval in statistics can be parametric or nonparametric. To draw a parametric confidence interval, you scale the standard error, using a formula similar to the one mentioned above. For example, an approximate 95% confidence interval can be constructed by taking the mean +/- two standard errors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars((\"se\", 2))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The nonparametric bootstrap has advantages similar to those of the percentile interval: it will naturally adapt to skewed and bounded data in a way that a standard error interval cannot. It is also more general. While the standard error formula is specific to the mean, error bars can be computed using the bootstrap for any estimator:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(\"ci\", estimator=\"median\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Bootstrapping involves randomness, and the error bars will appear slightly different each time you run the code that creates them. A few parameters control this. One sets the number of iterations (`n_boot`): with more iterations, the resulting intervals will be more stable. The other sets the `seed` for the random number generator, which will ensure identical results:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(\"ci\", n_boot=5000, seed=10)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Because of its iterative process, bootstrap intervals can be expensive to compute, especially for large datasets. But because uncertainty decreases with sample size, it may be more informative in that case to use an error bar that represents data spread.\n", + "\n", + "Custom error bars\n", + "~~~~~~~~~~~~~~~~~\n", + "\n", + "If these recipes are not sufficient, it is also possible to pass a generic function to the `errorbar` parameter. This function should take a vector and produce a pair of values representing the minimum and maximum points of the interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_errorbars(lambda x: (x.min(), x.max()))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "(In practice, you could show the full range of the data with `errorbar=(\"pi\", 100)` rather than the custom function shown above).\n", + "\n", + "Note that seaborn functions cannot currently draw error bars from values that have been calculated externally, although matplotlib functions can be used to add such error bars to seaborn plots." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Error bars on regression fits\n", + "-----------------------------\n", + "\n", + "The preceding discussion has focused on error bars shown around parameter estimates for aggregate data. Error bars also arise in seaborn when estimating regression models to visualize relationships. Here, the error bars will be represented by a \"band\" around the regression line:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.random.normal(0, 1, 50)\n", + "y = x * 2 + np.random.normal(0, 2, size=x.size)\n", + "sns.regplot(x=x, y=y)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Currently, the error bars on a regression estimate are less flexible, only showing a confidence interval with a size set through `ci=`. This may change in the future." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Are error bars enough?\n", + "----------------------\n", + "\n", + "You should always ask yourself whether it's best to use a plot that displays only a summary statistic and error bar. In many cases, it isn't.\n", + "\n", + "If you are interested in questions about summaries (such as whether the mean value differs between groups or increases over time), aggregation reduces the complexity of the plot and makes those inferences easier. But in doing so, it obscures valuable information about the underlying data points, such as the shape of the distributions and the presence of outliers.\n", + "\n", + "When analyzing your own data, don't be satisfied with summary statistics. Always look at the underlying distributions too. Sometimes, it can be helpful to combine both perspectives into the same figure. Many seaborn functions can help with this task, especially those discussed in the :doc:`categorical tutorial `." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/function_overview.ipynb b/doc/_tutorial/function_overview.ipynb new file mode 100644 index 0000000000..3648504cf5 --- /dev/null +++ b/doc/_tutorial/function_overview.ipynb @@ -0,0 +1,496 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _function_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Overview of seaborn plotting functions\n", + "======================================\n", + "\n", + "Most of your interactions with seaborn will happen through a set of plotting functions. Later chapters in the tutorial will explore the specific features offered by each function. This chapter will introduce, at a high-level, the different kinds of functions that you will encounter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import HTML\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Similar functions for similar tasks\n", + "-----------------------------------\n", + "\n", + "The seaborn namespace is flat; all of the functionality is accessible at the top level. But the code itself is hierarchically structured, with modules of functions that achieve similar visualization goals through different means. Most of the docs are structured around these modules: you'll encounter names like \"relational\", \"distributional\", and \"categorical\".\n", + "\n", + "For example, the :ref:`distributions module ` defines functions that specialize in representing the distribution of datapoints. This includes familiar methods like the histogram:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.histplot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Along with similar, but perhaps less familiar, options such as kernel density estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Functions within a module share a lot of underlying code and offer similar features that may not be present in other components of the library (such as ``multiple=\"stack\"`` in the examples above). They are designed to facilitate switching between different visual representations as you explore a dataset, because different representations often have complementary strengths and weaknesses." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Figure-level vs. axes-level functions\n", + "-------------------------------------\n", + "\n", + "In addition to the different modules, there is a cross-cutting classification of seaborn functions as \"axes-level\" or \"figure-level\". The examples above are axes-level functions. They plot data onto a single :class:`matplotlib.pyplot.Axes` object, which is the return value of the function.\n", + "\n", + "In contrast, figure-level functions interface with matplotlib through a seaborn object, usually a :class:`FacetGrid`, that manages the figure. Each module has a single figure-level function, which offers a unitary interface to its various axes-level functions. The organization looks a bit like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from matplotlib.patches import FancyBboxPatch\n", + "\n", + "f, ax = plt.subplots(figsize=(7, 5))\n", + "f.subplots_adjust(0, 0, 1, 1)\n", + "ax.set_axis_off()\n", + "ax.set(xlim=(0, 1), ylim=(0, 1))\n", + "\n", + "\n", + "modules = \"relational\", \"distributions\", \"categorical\"\n", + "\n", + "pal = sns.color_palette(\"deep\")\n", + "colors = dict(relational=pal[0], distributions=pal[1], categorical=pal[2])\n", + "\n", + "pal = sns.color_palette(\"dark\")\n", + "text_colors = dict(relational=pal[0], distributions=pal[1], categorical=pal[2])\n", + "\n", + "\n", + "functions = dict(\n", + " relational=[\"scatterplot\", \"lineplot\"],\n", + " distributions=[\"histplot\", \"kdeplot\", \"ecdfplot\", \"rugplot\"],\n", + " categorical=[\"stripplot\", \"swarmplot\", \"boxplot\", \"violinplot\", \"pointplot\", \"barplot\"],\n", + ")\n", + "\n", + "pad = .06\n", + "\n", + "w = .2\n", + "h = .15\n", + "\n", + "xs = np.arange(0, 1, 1 / 3) + pad * 1.05\n", + "y = .7\n", + "\n", + "for x, mod in zip(xs, modules):\n", + " color = colors[mod] + (.2,)\n", + " text_color = text_colors[mod]\n", + " box = FancyBboxPatch((x, y), w, h, f\"round,pad={pad}\", color=\"white\")\n", + " ax.add_artist(box)\n", + " box = FancyBboxPatch((x, y), w, h, f\"round,pad={pad}\", linewidth=1, edgecolor=text_color, facecolor=color)\n", + " ax.add_artist(box)\n", + " ax.text(x + w / 2, y + h / 2, f\"{mod[:3]}plot\\n({mod})\", ha=\"center\", va=\"center\", size=22, color=text_color)\n", + "\n", + " for i, func in enumerate(functions[mod]):\n", + " x_i = x + w / 2\n", + " y_i = y - i * .1 - h / 2 - pad\n", + " box = FancyBboxPatch((x_i - w / 2, y_i - pad / 3), w, h / 4, f\"round,pad={pad / 3}\",\n", + " color=\"white\")\n", + " ax.add_artist(box)\n", + " box = FancyBboxPatch((x_i - w / 2, y_i - pad / 3), w, h / 4, f\"round,pad={pad / 3}\",\n", + " linewidth=1, edgecolor=text_color, facecolor=color)\n", + " ax.add_artist(box)\n", + " ax.text(x_i, y_i, func, ha=\"center\", va=\"center\", size=18, color=text_color)\n", + "\n", + " ax.plot([x_i, x_i], [y, y_i], zorder=-100, color=text_color, lw=1)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "For example, :func:`displot` is the figure-level function for the distributions module. Its default behavior is to draw a histogram, using the same code as :func:`histplot` behind the scenes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To draw a kernel density plot instead, using the same code as :func:`kdeplot`, select it using the ``kind`` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", multiple=\"stack\", kind=\"kde\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You'll notice that the figure-level plots look mostly like their axes-level counterparts, but there are a few differences. Notably, the legend is placed outside the plot. They also have a slightly different shape (more on that shortly).\n", + "\n", + "The most useful feature offered by the figure-level functions is that they can easily create figures with multiple subplots. For example, instead of stacking the three distributions for each species of penguins in the same axes, we can \"facet\" them by plotting each distribution across the columns of the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=penguins, x=\"flipper_length_mm\", hue=\"species\", col=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The figure-level functions wrap their axes-level counterparts and pass the kind-specific keyword arguments (such as the bin size for a histogram) down to the underlying function. That means they are no less flexible, but there is a downside: the kind-specific parameters don't appear in the function signature or docstrings. Some of their features might be less discoverable, and you may need to look at two different pages of the documentation before understanding how to achieve a specific goal." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Axes-level functions make self-contained plots\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "The axes-level functions are written to act like drop-in replacements for matplotlib functions. While they add axis labels and legends automatically, they don't modify anything beyond the axes that they are drawn into. That means they can be composed into arbitrarily-complex matplotlib figures with predictable results.\n", + "\n", + "The axes-level functions call :func:`matplotlib.pyplot.gca` internally, which hooks into the matplotlib state-machine interface so that they draw their plots on the \"currently-active\" axes. But they additionally accept an ``ax=`` argument, which integrates with the object-oriented interface and lets you specify exactly where each plot should go:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, axs = plt.subplots(1, 2, figsize=(8, 4), gridspec_kw=dict(width_ratios=[4, 3]))\n", + "sns.scatterplot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", hue=\"species\", ax=axs[0])\n", + "sns.histplot(data=penguins, x=\"species\", hue=\"species\", shrink=.8, alpha=.8, legend=False, ax=axs[1])\n", + "f.tight_layout()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Figure-level functions own their figure\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "In contrast, figure-level functions cannot (easily) be composed with other plots. By design, they \"own\" their own figure, including its initialization, so there's no notion of using a figure-level function to draw a plot onto an existing axes. This constraint allows the figure-level functions to implement features such as putting the legend outside of the plot.\n", + "\n", + "Nevertheless, it is possible to go beyond what the figure-level functions offer by accessing the matplotlib axes on the object that they return and adding other elements to the plot that way:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "g = sns.relplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "g.ax.axline(xy1=(10, 2), slope=.2, color=\"b\", dashes=(5, 2))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Customizing plots from a figure-level function\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "The figure-level functions return a :class:`FacetGrid` instance, which has a few methods for customizing attributes of the plot in a way that is \"smart\" about the subplot organization. For example, you can change the labels on the external axes using a single line of code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.relplot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", col=\"sex\")\n", + "g.set_axis_labels(\"Flipper length (mm)\", \"Bill length (mm)\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "While convenient, this does add a bit of extra complexity, as you need to remember that this method is not part of the matplotlib API and exists only when using a figure-level function." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _figure_size_tutorial:\n", + "\n", + "Specifying figure sizes\n", + "^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "To increase or decrease the size of a matplotlib plot, you set the width and height of the entire figure, either in the `global rcParams `_, while setting up the plot (e.g. with the ``figsize`` parameter of :func:`matplotlib.pyplot.subplots`), or by calling a method on the figure object (e.g. :meth:`matplotlib.Figure.set_size_inches`). When using an axes-level function in seaborn, the same rules apply: the size of the plot is determined by the size of the figure it is part of and the axes layout in that figure.\n", + "\n", + "When using a figure-level function, there are several key differences. First, the functions themselves have parameters to control the figure size (although these are actually parameters of the underlying :class:`FacetGrid` that manages the figure). Second, these parameters, ``height`` and ``aspect``, parameterize the size slightly differently than the ``width``, ``height`` parameterization in matplotlib (using the seaborn parameters, ``width = height * aspect``). Most importantly, the parameters correspond to the size of each *subplot*, rather than the size of the overall figure.\n", + "\n", + "To illustrate the difference between these approaches, here is the default output of :func:`matplotlib.pyplot.subplots` with one subplot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A figure with multiple columns will have the same overall size, but the axes will be squeezed horizontally to fit in the space:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(1, 2, sharey=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In contrast, a plot created by a figure-level function will be square. To demonstrate that, let's set up an empty plot by using :class:`FacetGrid` directly. This happens behind the scenes in functions like :func:`relplot`, :func:`displot`, or :func:`catplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(penguins)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When additional columns are added, the figure itself will become wider, so that its subplots have the same size and shape:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(penguins, col=\"sex\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "And you can adjust the size and shape of each subplot without accounting for the total number of rows and columns in the figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(penguins, col=\"sex\", height=3.5, aspect=.75)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The upshot is that you can assign faceting variables without stopping to think about how you'll need to adjust the total figure size. A downside is that, when you do want to change the figure size, you'll need to remember that things work a bit differently than they do in matplotlib." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Relative merits of figure-level functions\n", + "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "\n", + "Here is a summary of the pros and cons that we have discussed above:\n", + "\n", + ".. list-table::\n", + " :header-rows: 1\n", + "\n", + " * - Advantages\n", + " - Drawbacks\n", + " * - Easy faceting by data variables\n", + " - Many parameters not in function signature\n", + " * - Legend outside of plot by default\n", + " - Cannot be part of a larger matplotlib figure\n", + " * - Easy figure-level customization\n", + " - Different API from matplotlib\n", + " * - Different figure size parameterization\n", + " - Different figure size parameterization\n", + "\n", + "On balance, the figure-level functions add some additional complexity that can make things more confusing for beginners, but their distinct features give them additional power. The tutorial documentation mostly uses the figure-level functions, because they produce slightly cleaner plots, and we generally recommend their use for most applications. The one situation where they are not a good choice is when you need to make a complex, standalone figure that composes multiple different plot kinds. At this point, it's recommended to set up the figure using matplotlib directly and to fill in the individual components using axes-level functions." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Combining multiple views on the data\n", + "------------------------------------\n", + "\n", + "Two important plotting functions in seaborn don't fit cleanly into the classification scheme discussed above. These functions, :func:`jointplot` and :func:`pairplot`, employ multiple kinds of plots from different modules to represent multiple aspects of a dataset in a single figure. Both plots are figure-level functions and create figures with multiple subplots by default. But they use different objects to manage the figure: :class:`JointGrid` and :class:`PairGrid`, respectively.\n", + "\n", + ":func:`jointplot` plots the relationship or joint distribution of two variables while adding marginal axes that show the univariate distribution of each one separately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ":func:`pairplot` is similar — it combines joint and marginal views — but rather than focusing on a single relationship, it visualizes every pairwise combination of variables simultaneously:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(data=penguins, hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Behind the scenes, these functions are using axes-level functions that you have already met (:func:`scatterplot` and :func:`kdeplot`), and they also have a ``kind`` parameter that lets you quickly swap in a different representation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", hue=\"species\", kind=\"hist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/introduction.ipynb b/doc/_tutorial/introduction.ipynb new file mode 100644 index 0000000000..37792610a1 --- /dev/null +++ b/doc/_tutorial/introduction.ipynb @@ -0,0 +1,469 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _introduction:\n", + "\n", + ".. currentmodule:: seaborn\n", + "\n", + "An introduction to seaborn\n", + "==========================\n", + "\n", + "Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib `_ and integrates closely with `pandas `_ data structures.\n", + "\n", + "Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.\n", + "\n", + "Here's an example of what seaborn can do:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import seaborn\n", + "import seaborn as sns\n", + "\n", + "# Apply the default theme\n", + "sns.set_theme()\n", + "\n", + "# Load an example dataset\n", + "tips = sns.load_dataset(\"tips\")\n", + "\n", + "# Create a visualization\n", + "sns.relplot(\n", + " data=tips,\n", + " x=\"total_bill\", y=\"tip\", col=\"time\",\n", + " hue=\"smoker\", style=\"smoker\", size=\"size\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A few things have happened here. Let's go through them one by one:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# Import seaborn\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Seaborn is the only library we need to import for this simple example. By convention, it is imported with the shorthand ``sns``.\n", + "\n", + "Behind the scenes, seaborn uses matplotlib to draw its plots. For interactive work, it's recommended to use a Jupyter/IPython interface in `matplotlib mode `_, or else you'll have to call :func:`matplotlib.pyplot.show` when you want to see the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# Apply the default theme\n", + "sns.set_theme()" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This uses the matplotlib rcParam system and will affect how all matplotlib plots look, even if you don't make them with seaborn. Beyond the default theme, there are :doc:`several other options `, and you can independently control the style and scaling of the plot to quickly translate your work between presentation contexts (e.g., making a version of your figure that will have readable fonts when projected during a talk). If you like the matplotlib defaults or prefer a different theme, you can skip this step and still use the seaborn plotting functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# Load an example dataset\n", + "tips = sns.load_dataset(\"tips\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Most code in the docs will use the :func:`load_dataset` function to get quick access to an example dataset. There's nothing special about these datasets: they are just pandas dataframes, and we could have loaded them with :func:`pandas.read_csv` or built them by hand. Most of the examples in the documentation will specify data using pandas dataframes, but seaborn is very flexible about the :doc:`data structures ` that it accepts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# Create a visualization\n", + "sns.relplot(\n", + " data=tips,\n", + " x=\"total_bill\", y=\"tip\", col=\"time\",\n", + " hue=\"smoker\", style=\"smoker\", size=\"size\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This plot shows the relationship between five variables in the tips dataset using a single call to the seaborn function :func:`relplot`. Notice how we provided only the names of the variables and their roles in the plot. Unlike when using matplotlib directly, it wasn't necessary to specify attributes of the plot elements in terms of the color values or marker codes. Behind the scenes, seaborn handled the translation from values in the dataframe to arguments that matplotlib understands. This declarative approach lets you stay focused on the questions that you want to answer, rather than on the details of how to control matplotlib.\n", + "\n", + ".. _intro_api_abstraction:\n", + "\n", + "A high-level API for statistical graphics\n", + "-----------------------------------------\n", + "\n", + "There is no universally best way to visualize data. Different questions are best answered by different plots. Seaborn makes it easy to switch between different visual representations by using a consistent dataset-oriented API.\n", + "\n", + "The function :func:`relplot` is named that way because it is designed to visualize many different statistical *relationships*. While scatter plots are often effective, relationships where one variable represents a measure of time are better represented by a line. The :func:`relplot` function has a convenient ``kind`` parameter that lets you easily switch to this alternate representation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dots = sns.load_dataset(\"dots\")\n", + "sns.relplot(\n", + " data=dots, kind=\"line\",\n", + " x=\"time\", y=\"firing_rate\", col=\"align\",\n", + " hue=\"choice\", size=\"coherence\", style=\"choice\",\n", + " facet_kws=dict(sharex=False),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Notice how the ``size`` and ``style`` parameters are used in both the scatter and line plots, but they affect the two visualizations differently: changing the marker area and symbol in the scatter plot vs the line width and dashing in the line plot. We did not need to keep those details in mind, letting us focus on the overall structure of the plot and the information we want it to convey.\n", + "\n", + ".. _intro_stat_estimation:\n", + "\n", + "Statistical estimation\n", + "~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Often, we are interested in the *average* value of one variable as a function of other variables. Many seaborn functions will automatically perform the statistical estimation that is necessary to answer these questions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fmri = sns.load_dataset(\"fmri\")\n", + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", col=\"region\",\n", + " hue=\"event\", style=\"event\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When statistical values are estimated, seaborn will use bootstrapping to compute confidence intervals and draw error bars representing the uncertainty of the estimate.\n", + "\n", + "Statistical estimation in seaborn goes beyond descriptive statistics. For example, it is possible to enhance a scatterplot by including a linear regression model (and its uncertainty) using :func:`lmplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(data=tips, x=\"total_bill\", y=\"tip\", col=\"time\", hue=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_distributions:\n", + "\n", + "\n", + "Distributional representations\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Statistical analyses require knowledge about the distribution of variables in your dataset. The seaborn function :func:`displot` supports several approaches to visualizing distributions. These include classic techniques like histograms and computationally-intensive approaches like kernel density estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=tips, x=\"total_bill\", col=\"time\", kde=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Seaborn also tries to promote techniques that are powerful but less familiar, such as calculating and plotting the empirical cumulative distribution function of the data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.displot(data=tips, kind=\"ecdf\", x=\"total_bill\", col=\"time\", hue=\"smoker\", rug=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_categorical:\n", + "\n", + "Plots for categorical data\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Several specialized plot types in seaborn are oriented towards visualizing categorical data. They can be accessed through :func:`catplot`. These plots offer different levels of granularity. At the finest level, you may wish to see every observation by drawing a \"swarm\" plot: a scatter plot that adjusts the positions of the points along the categorical axis so that they don't overlap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, kind=\"swarm\", x=\"day\", y=\"total_bill\", hue=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Alternately, you could use kernel density estimation to represent the underlying distribution that the points are sampled from:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, kind=\"violin\", x=\"day\", y=\"total_bill\", hue=\"smoker\", split=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or you could show only the mean value and its confidence interval within each nested category:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.catplot(data=tips, kind=\"bar\", x=\"day\", y=\"total_bill\", hue=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_dataset_funcs:\n", + "\n", + "Multivariate views on complex datasets\n", + "--------------------------------------\n", + "\n", + "Some seaborn functions combine multiple kinds of plots to quickly give informative summaries of a dataset. One, :func:`jointplot`, focuses on a single relationship. It plots the joint distribution between two variables along with each variable's marginal distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "penguins = sns.load_dataset(\"penguins\")\n", + "sns.jointplot(data=penguins, x=\"flipper_length_mm\", y=\"bill_length_mm\", hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The other, :func:`pairplot`, takes a broader view: it shows joint and marginal distributions for all pairwise relationships and for each variable, respectively:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(data=penguins, hue=\"species\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_figure_classes:\n", + "\n", + "Lower-level tools for building figures\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "These tools work by combining :doc:`axes-level ` plotting functions with objects that manage the layout of the figure, linking the structure of a dataset to a :doc:`grid of axes `. Both elements are part of the public API, and you can use them directly to create complex figures with only a few more lines of code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.PairGrid(penguins, hue=\"species\", corner=True)\n", + "g.map_lower(sns.kdeplot, hue=None, levels=5, color=\".2\")\n", + "g.map_lower(sns.scatterplot, marker=\"+\")\n", + "g.map_diag(sns.histplot, element=\"step\", linewidth=0, kde=True)\n", + "g.add_legend(frameon=True)\n", + "g.legend.set_bbox_to_anchor((.61, .6))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_defaults:\n", + "\n", + "Opinionated defaults and flexible customization\n", + "-----------------------------------------------\n", + "\n", + "Seaborn creates complete graphics with a single function call: when possible, its functions will automatically add informative axis labels and legends that explain the semantic mappings in the plot.\n", + "\n", + "In many cases, seaborn will also choose default values for its parameters based on characteristics of the data. For example, the :doc:`color mappings ` that we have seen so far used distinct hues (blue, orange, and sometimes green) to represent different levels of the categorical variables assigned to ``hue``. When mapping a numeric variable, some functions will switch to a continuous gradient:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=penguins,\n", + " x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"body_mass_g\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When you're ready to share or publish your work, you'll probably want to polish the figure beyond what the defaults achieve. Seaborn allows for several levels of customization. It defines multiple built-in :doc:`themes ` that apply to all figures, its functions have standardized parameters that can modify the semantic mappings for each plot, and additional keyword arguments are passed down to the underlying matplotlib artists, allowing even more control. Once you've created a plot, its properties can be modified through both the seaborn API and by dropping down to the matplotlib layer for fine-grained tweaking:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.set_theme(style=\"ticks\", font_scale=1.25)\n", + "g = sns.relplot(\n", + " data=penguins,\n", + " x=\"bill_length_mm\", y=\"bill_depth_mm\", hue=\"body_mass_g\",\n", + " palette=\"crest\", marker=\"x\", s=100,\n", + ")\n", + "g.set_axis_labels(\"Bill length (mm)\", \"Bill depth (mm)\", labelpad=10)\n", + "g.legend.set_title(\"Body mass (g)\")\n", + "g.figure.set_size_inches(6.5, 4.5)\n", + "g.ax.margins(.15)\n", + "g.despine(trim=True)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_matplotlib:\n", + "\n", + "Relationship to matplotlib\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Seaborn's integration with matplotlib allows you to use it across the many environments that matplotlib supports, including exploratory analysis in notebooks, real-time interaction in GUI applications, and archival output in a number of raster and vector formats.\n", + "\n", + "While you can be productive using only seaborn functions, full customization of your graphics will require some knowledge of matplotlib's concepts and API. One aspect of the learning curve for new users of seaborn will be knowing when dropping down to the matplotlib layer is necessary to achieve a particular customization. On the other hand, users coming from matplotlib will find that much of their knowledge transfers.\n", + "\n", + "Matplotlib has a comprehensive and powerful API; just about any attribute of the figure can be changed to your liking. A combination of seaborn's high-level interface and matplotlib's deep customizability will allow you both to quickly explore your data and to create graphics that can be tailored into a `publication quality `_ final product." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _intro_next_steps:\n", + "\n", + "Next steps\n", + "~~~~~~~~~~\n", + "\n", + "You have a few options for where to go next. You might first want to learn how to :doc:`install seaborn `. Once that's done, you can browse the :doc:`example gallery ` to get a broader sense for what kind of graphics seaborn can produce. Or you can read through the rest of the :doc:`user guide and tutorial ` for a deeper discussion of the different tools and what they are designed to accomplish. If you have a specific plot in mind and want to know how to make it, you could check out the :doc:`API reference `, which documents each function's parameters and shows many examples to illustrate usage." + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/objects_interface.ipynb b/doc/_tutorial/objects_interface.ipynb new file mode 100644 index 0000000000..e47b82fe28 --- /dev/null +++ b/doc/_tutorial/objects_interface.ipynb @@ -0,0 +1,1090 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "35110bb9-6889-4bd5-b9d6-5a0479131433", + "metadata": {}, + "source": [ + ".. _objects_tutorial:\n", + "\n", + ".. currentmodule:: seaborn.objects\n", + "\n", + "The seaborn.objects interface\n", + "=============================\n", + "\n", + "The `seaborn.objects` namespace was introduced in version 0.12 as a completely new interface for making seaborn plots. It offers a more consistent and flexible API, comprising a collection of composable classes for transforming and plotting data. In contrast to the existing `seaborn` functions, the new interface aims to support end-to-end plot specification and customization without dropping down to matplotlib (although it will remain possible to do so if necessary).\n", + "\n", + ".. note::\n", + " The objects interface is currently experimental and incomplete. It is stable enough for serious use, but there certainly are some rough edges and missing features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "706badfa-58be-4808-9016-bd0ca3ebaf12", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "tips = sns.load_dataset(\"tips\")\n", + "penguins = sns.load_dataset(\"penguins\").dropna()\n", + "diamonds = sns.load_dataset(\"diamonds\")\n", + "healthexp = sns.load_dataset(\"healthexp\").sort_values([\"Country\", \"Year\"]).query(\"Year <= 2020\")" + ] + }, + { + "cell_type": "raw", + "id": "dd1ceae5-f930-41c2-8a18-f3cf94a161ad", + "metadata": {}, + "source": [ + "Specifying a plot and mapping data\n", + "----------------------------------\n", + "\n", + "The objects interface should be imported with the following convention:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c113156-20ad-4612-a9f5-0071d7fd35dd", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn.objects as so" + ] + }, + { + "cell_type": "raw", + "id": "6518484e-828b-4e7c-8529-ed6c9e61fa69", + "metadata": {}, + "source": [ + "The `seaborn.objects` namespace will provide access to all of the relevant classes. The most important is :class:`Plot`. You specify plots by instantiating a :class:`Plot` object and calling its methods. Let's see a simple example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e7f8ad0-9831-464b-9825-60733f110f34", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + " .add(so.Dot())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "52785052-6c80-4f35-87e4-b27df499bd5c", + "metadata": {}, + "source": [ + "This code, which produces a scatter plot, should look reasonably familiar. Just as when using :func:`seaborn.scatterplot`, we passed a tidy dataframe (`penguins`) and assigned two of its columns to the `x` and `y` coordinates of the plot. But instead of starting with the type of chart and then adding some data assignments, here we started with the data assignments and then added a graphical element.\n", + "\n", + "Setting properties\n", + "~~~~~~~~~~~~~~~~~~\n", + "\n", + "The :class:`Dot` class is an example of a :class:`Mark`: an object that graphically represents data values. Each mark will have a number of properties that can be set to change its appearance:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "310bac42-cfe4-4c45-9ddf-27c2cb200a8a", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\")\n", + " .add(so.Dot(color=\"g\", pointsize=4))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "3f817822-dd96-4263-a42e-824f9ca4083a", + "metadata": {}, + "source": [ + "Mapping properties\n", + "~~~~~~~~~~~~~~~~~~\n", + "\n", + "As with seaborn's functions, it is also possible to *map* data values to various graphical properties:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6267e411-1f75-461e-a189-ead4452b2ec6", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " color=\"species\", pointsize=\"body_mass_g\",\n", + " )\n", + " .add(so.Dot())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "b6bfc0bf-cae1-44ed-9f52-e9f748c3877d", + "metadata": {}, + "source": [ + "While this basic functionality is not novel, an important difference from the function API is that properties are mapped using the same parameter names that would set them directly (instead of having `hue` vs. `color`, etc.). What matters is *where* the property is defined: passing a value when you initialize :class:`Dot` will set it directly, whereas assigning a variable when you set up the :class:`Plot` will *map* the corresponding data.\n", + "\n", + "Beyond this difference, the objects interface also allows a much wider range of mark properties to be mapped:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8637528-4e17-4a41-be1c-2cb4275a5586", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(\n", + " penguins, x=\"bill_length_mm\", y=\"bill_depth_mm\",\n", + " edgecolor=\"sex\", edgewidth=\"body_mass_g\",\n", + " )\n", + " .add(so.Dot(color=\".8\"))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "220930c4-410c-4452-a89e-95045f325cc0", + "metadata": {}, + "source": [ + "Defining groups\n", + "~~~~~~~~~~~~~~~\n", + "\n", + "The :class:`Dot` mark represents each data point independently, so the assignment of a variable to a property only has the effect of changing each dot's appearance. For marks that group or connect observations, such as :class:`Line`, it also determines the number of distinct graphical elements:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95f892e1-8adc-43d3-8b30-84d8c848040a", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Life_Expectancy\", color=\"Country\")\n", + " .add(so.Line())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "6665552c-674b-405e-a3ee-237517649349", + "metadata": {}, + "source": [ + "It is also possible to define a grouping without changing any visual properties, by using `group`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9287beb-7a66-4dcb-bccf-9c5cab2790f4", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Life_Expectancy\", group=\"Country\")\n", + " .add(so.Line())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "be097dfa-e33c-41f5-8b5a-09013cb33e6e", + "metadata": {}, + "source": [ + "Transforming data before plotting\n", + "---------------------------------\n", + "\n", + "Statistical transformation\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "As with many seaborn functions, the objects interface supports statistical transformations. These are performed by :class:`Stat` objects, such as :class:`Agg`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0964d2af-ce53-48b5-b79a-3277b05584dd", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\")\n", + " .add(so.Bar(), so.Agg())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "5ac229b2-3692-4d35-8ba3-e35262f198ce", + "metadata": {}, + "source": [ + "In the function interface, statistical transformations are possible with some visual representations (e.g. :func:`seaborn.barplot`) but not others (e.g. :func:`seaborn.scatterplot`). The objects interface more cleanly separates representation and transformation, allowing you to compose :class:`Mark` and :class:`Stat` objects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c2f917d-1cb7-4d33-b8c4-2126a4f91ccc", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\")\n", + " .add(so.Dot(pointsize=10), so.Agg())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "1b9d7688-22f5-4f4a-b58e-71d8ff550b48", + "metadata": {}, + "source": [ + "When forming groups by mapping properties, the :class:`Stat` transformation is applied to each group separately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "734f9dac-4663-4e51-8070-716c0c0296c6", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\", color=\"sex\")\n", + " .add(so.Dot(pointsize=10), so.Agg())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e60a8e83-c34c-4769-b34f-e0c23c80b870", + "metadata": {}, + "source": [ + "Resolving overplotting\n", + "~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Some seaborn functions also have mechanisms that automatically resolve overplotting, as when :func:`seaborn.barplot` \"dodges\" bars once `hue` is assigned. The objects interface has less complex default behavior. Bars representing multiple groups will overlap by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96653815-7da3-4a77-877a-485b5e7578a4", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\", color=\"sex\")\n", + " .add(so.Bar(), so.Agg())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "06ee3b9f-0ae9-467f-8a40-e340e6f3ce7d", + "metadata": {}, + "source": [ + "Nevertheless, it is possible to compose the :class:`Bar` mark with the :class:`Agg` stat and a second transformation, implemented by :class:`Dodge`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e29792ae-c238-4538-952a-5af81adcefe0", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\", color=\"sex\")\n", + " .add(so.Bar(), so.Agg(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a27dcb37-be58-427b-a722-9039b91b6503", + "metadata": {}, + "source": [ + "The :class:`Dodge` class is an example of a :class:`Move` transformation, which is like a :class:`Stat` but only adjusts `x` and `y` coordinates. The :class:`Move` classes can be applied with any mark, and it's not necessary to use a :class:`Stat` first:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4509ea7-36fe-4ffb-b784-e945d13fb93c", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\", color=\"sex\")\n", + " .add(so.Dot(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a62e44ae-d6e7-4ab5-af2e-7b49a2031b1d", + "metadata": {}, + "source": [ + "It's also possible to apply multiple :class:`Move` operations in sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07536818-9ddd-46d1-b10c-b034fa257335", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\", y=\"body_mass_g\", color=\"sex\")\n", + " .add(so.Dot(), so.Dodge(), so.Jitter(.3))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "fd8ed5cc-6ba4-4d03-8414-57a782971d4c", + "metadata": {}, + "source": [ + "Creating variables through transformation\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The :class:`Agg` stat requires both `x` and `y` to already be defined, but variables can also be *created* through statistical transformation. For example, the :class:`Hist` stat requires only one of `x` *or* `y` to be defined, and it will create the other by counting observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b1f2c61-d294-4a85-a383-384d92523c36", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"species\")\n", + " .add(so.Bar(), so.Hist())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "9b33ea0c-f11d-48d7-be7c-13e9993906d8", + "metadata": {}, + "source": [ + "The :class:`Hist` stat will also create new `x` values (by binning) when given numeric data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25123abd-75d4-4550-ac86-5281fdabc023", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"flipper_length_mm\")\n", + " .add(so.Bars(), so.Hist())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0dd84c56-eeb3-4904-b957-1677eaebd33c", + "metadata": {}, + "source": [ + "Notice how we used :class:`Bars`, rather than :class:`Bar` for the plot with the continuous `x` axis. These two marks are related, but :class:`Bars` has different defaults and works better for continuous histograms. It also produces a different, more efficient matplotlib artist. You will find the pattern of singular/plural marks elsewhere. The plural version is typically optimized for cases with larger numbers of marks.\n", + "\n", + "Some transforms accept both `x` and `y`, but add *interval* data for each coordinate. This is particularly relevant for plotting error bars after aggregating:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bc29e9d-d660-4638-80fd-8d77e15d9109", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"body_mass_g\", y=\"species\", color=\"sex\")\n", + " .add(so.Range(), so.Est(errorbar=\"sd\"), so.Dodge())\n", + " .add(so.Dot(), so.Agg(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "3aecc891-1abb-45b2-bf15-c6944820b242", + "metadata": {}, + "source": [ + "Orienting marks and transforms\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "When aggregating, dodging, and drawing a bar, the `x` and `y` variables are treated differently. Each operation has the concept of an *orientation*. The :class:`Plot` tries to determine the orientation automatically based on the data types of the variables. For instance, if we flip the assignment of `species` and `body_mass_g`, we'll get the same plot, but oriented horizontally:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1dd7ebeb-893e-4d27-aeaf-a8ff0cd2cc15", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"body_mass_g\", y=\"species\", color=\"sex\")\n", + " .add(so.Bar(), so.Agg(), so.Dodge())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "382603cb-9ae9-46ed-bceb-b48456781092", + "metadata": {}, + "source": [ + "Sometimes, the correct orientation is ambiguous, as when both the `x` and `y` variables are numeric. In these cases, you can be explicit by passing the `orient` parameter to :meth:`Plot.add`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75277dda-47c4-443c-9454-b8d97fc399e2", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"size\", color=\"time\")\n", + " .add(so.Bar(), so.Agg(), so.Dodge(), orient=\"y\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "dc845c14-03e5-495d-9dc8-3a90f7879346", + "metadata": {}, + "source": [ + "Building and displaying the plot\n", + "--------------------------------\n", + "\n", + "Most examples this far have produced a single subplot with just one kind of mark on it. But :class:`Plot` does not limit you to this.\n", + "\n", + "Adding multiple layers\n", + "~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "More complex single-subplot graphics can be created by calling :meth:`Plot.add` repeatedly. Each time it is called, it defines a *layer* in the plot. For example, we may want to add a scatterplot (now using :class:`Dots`) and then a regression fit:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "922b6d3d-7a81-4921-97f2-953a1fbc69ec", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"tip\")\n", + " .add(so.Dots())\n", + " .add(so.Line(), so.PolyFit())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "f0309733-a86a-4952-bc3b-533d639f0b52", + "metadata": {}, + "source": [ + "Variable mappings that are defined in the :class:`Plot` constructor will be used for all layers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "604d16b9-383b-4b88-9ed7-fdefed55039a", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"tip\", color=\"time\")\n", + " .add(so.Dots())\n", + " .add(so.Line(), so.PolyFit())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "eb56fb8d-aaa3-4b6e-b311-0354562174b5", + "metadata": {}, + "source": [ + "Layer-specific mappings\n", + "~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "You can also define a mapping such that it is used only in a specific layer. This is accomplished by defining the mapping within the call to :class:`Plot.add` for the relevant layer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f69a3a38-97e8-40fb-b7d4-95a751ebdcfb", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"tip\")\n", + " .add(so.Dots(), color=\"time\")\n", + " .add(so.Line(color=\".2\"), so.PolyFit())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "b3f94f01-23d4-4f7a-98f8-de93dafc230a", + "metadata": {}, + "source": [ + "Alternatively, define the layer for the entire plot, but *remove* it from a specific layer by setting the variable to `None`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45706bec-3453-4a7e-9ac7-c743baff4da6", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(tips, x=\"total_bill\", y=\"tip\", color=\"time\")\n", + " .add(so.Dots())\n", + " .add(so.Line(color=\".2\"), so.PolyFit(), color=None)\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "295013b3-7d91-4a59-b63b-fe50e642954c", + "metadata": {}, + "source": [ + "To recap, there are three ways to specify the value of a mark property: (1) by mapping a variable in all layers, (2) by mapping a variable in a specific layer, and (3) by setting the property directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2341eafd-4d6f-4530-835a-a409d2057d74", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from io import StringIO\n", + "from IPython.display import SVG\n", + "C = sns.color_palette(\"deep\")\n", + "f = mpl.figure.Figure(figsize=(7, 3))\n", + "ax = f.subplots()\n", + "fontsize = 18\n", + "ax.add_artist(mpl.patches.Rectangle((.13, .53), .45, .09, color=C[0], alpha=.3))\n", + "ax.add_artist(mpl.patches.Rectangle((.22, .43), .235, .09, color=C[1], alpha=.3))\n", + "ax.add_artist(mpl.patches.Rectangle((.49, .43), .26, .09, color=C[2], alpha=.3))\n", + "ax.text(.05, .55, \"Plot(data, 'x', 'y', color='var1')\", size=fontsize, color=\".2\")\n", + "ax.text(.05, .45, \".add(Dot(pointsize=10), marker='var2')\", size=fontsize, color=\".2\")\n", + "annots = [\n", + " (\"Mapped\\nin all layers\", (.35, .65), (0, 45)),\n", + " (\"Set directly\", (.35, .4), (0, -45)),\n", + " (\"Mapped\\nin this layer\", (.63, .4), (0, -45)),\n", + "]\n", + "for i, (text, xy, xytext) in enumerate(annots):\n", + " ax.annotate(\n", + " text, xy, xytext,\n", + " textcoords=\"offset points\", fontsize=14, ha=\"center\", va=\"center\",\n", + " arrowprops=dict(arrowstyle=\"->\", color=C[i]), color=C[i],\n", + " )\n", + "ax.set_axis_off()\n", + "f.subplots_adjust(0, 0, 1, 1)\n", + "f.savefig(s:=StringIO(), format=\"svg\")\n", + "SVG(s.getvalue())" + ] + }, + { + "cell_type": "raw", + "id": "cf2d8e39-d332-41f4-b327-2ac352878e58", + "metadata": {}, + "source": [ + "Faceting and pairing subplots\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "As with seaborn's figure-level functions (:func:`seaborn.displot`, :func:`seaborn.catplot`, etc.), the :class:`Plot` interface can also produce figures with multiple \"facets\", or subplots containing subsets of data. This is accomplished with the :meth:`Plot.facet` method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af737dfd-1cb2-418d-9f52-1deb93154a92", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"flipper_length_mm\")\n", + " .facet(\"species\")\n", + " .add(so.Bars(), so.Hist())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "81c2a445-5ae1-4272-8a6c-8bfe1f3b907f", + "metadata": {}, + "source": [ + "Call :meth:`Plot.facet` with the variables that should be used to define the columns and/or rows of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7b3495f-9a38-4976-b718-ce3672b8c186", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"flipper_length_mm\")\n", + " .facet(col=\"species\", row=\"sex\")\n", + " .add(so.Bars(), so.Hist())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "8b7fe085-acd2-46d2-81f6-a806dec338d3", + "metadata": {}, + "source": [ + "You can facet using a variable with a larger number of levels by \"wrapping\" across the other dimension:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d62d2310-ae33-4b42-bdea-7b7456afd640", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Life_Expectancy\")\n", + " .facet(col=\"Country\", wrap=3)\n", + " .add(so.Line())\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "86ecbeee-3ac2-41eb-b79e-9d6ed026061d", + "metadata": {}, + "source": [ + "All layers will be faceted unless you explicitly exclude them, which can be useful for providing additional context on each subplot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c38be724-8564-4fa0-861c-1d96ffbbda20", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(healthexp, x=\"Year\", y=\"Life_Expectancy\")\n", + " .facet(\"Country\", wrap=3)\n", + " .add(so.Line(alpha=.3), group=\"Country\", col=None)\n", + " .add(so.Line(linewidth=3))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "f97dad75-65e6-47fd-9fc4-08a8f2cb49ee", + "metadata": {}, + "source": [ + "An alternate way to produce subplots is :meth:`Plot.pair`. Like :class:`seaborn.PairGrid`, this draws all of the data on each subplot, using different variables for the x and/or y coordinates:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6350e99-2c70-4a96-87eb-74756a0fa335", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, y=\"body_mass_g\", color=\"species\")\n", + " .pair(x=[\"bill_length_mm\", \"bill_depth_mm\"])\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "4deea650-b4b9-46ea-876c-2e5a3a258649", + "metadata": {}, + "source": [ + "You can combine faceting and pairing so long as the operations add subplots on opposite dimensions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9de7948c-4c43-4116-956c-cbcb84d8652c", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, y=\"body_mass_g\", color=\"species\")\n", + " .pair(x=[\"bill_length_mm\", \"bill_depth_mm\"])\n", + " .facet(row=\"sex\")\n", + " .add(so.Dots())\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0a0febe3-9daf-4271-aef9-9637d59aaf10", + "metadata": {}, + "source": [ + "Integrating with matplotlib\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "There may be cases where you want multiple subplots to appear in a figure with a more complex structure than what :meth:`Plot.facet` or :meth:`Plot.pair` can provide. The current solution is to delegate figure setup to matplotlib and to supply the matplotlib object that :class:`Plot` should use with the :meth:`Plot.on` method. This object can be either a :class:`matplotlib.axes.Axes`, :class:`matplotlib.figure.Figure`, or :class:`matplotlib.figure.SubFigure`; the latter is most useful for constructing bespoke subplot layouts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b046466d-f6c2-43fa-9ae9-f40a292a82b7", + "metadata": {}, + "outputs": [], + "source": [ + "f = mpl.figure.Figure(figsize=(8, 4))\n", + "sf1, sf2 = f.subfigures(1, 2)\n", + "(\n", + " so.Plot(penguins, x=\"body_mass_g\", y=\"flipper_length_mm\")\n", + " .add(so.Dots())\n", + " .on(sf1)\n", + " .plot()\n", + ")\n", + "(\n", + " so.Plot(penguins, x=\"body_mass_g\")\n", + " .facet(row=\"sex\")\n", + " .add(so.Bars(), so.Hist())\n", + " .on(sf2)\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "7074f599-8b9f-4b77-9e15-55349592c747", + "metadata": {}, + "source": [ + "Building and displaying the plot\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "An important thing to know is that :class:`Plot` methods clone the object they are called on and return that clone instead of updating the object in place. This means that you can define a common plot spec and then produce several variations on it.\n", + "\n", + "So, take this basic specification:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b79b2148-b867-4e96-9b84-b3fc44ad0c82", + "metadata": {}, + "outputs": [], + "source": [ + "p = so.Plot(healthexp, \"Year\", \"Spending_USD\", color=\"Country\")" + ] + }, + { + "cell_type": "raw", + "id": "135f89e5-c41e-4c6c-9865-5413787bdc62", + "metadata": {}, + "source": [ + "We could use it to draw a line plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10722a20-dc8c-4421-a433-8ff21fed9495", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Line())" + ] + }, + { + "cell_type": "raw", + "id": "f9db1184-f352-41b8-a45a-02ff6eb85071", + "metadata": {}, + "source": [ + "Or perhaps a stacked area plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea2ad629-c718-44a9-92af-144728094cd5", + "metadata": {}, + "outputs": [], + "source": [ + "p.add(so.Area(), so.Stack())" + ] + }, + { + "cell_type": "raw", + "id": "17fb2676-6199-4a2c-9f10-3d5aebb7a285", + "metadata": {}, + "source": [ + "The :class:`Plot` methods are fully declarative. Calling them updates the plot spec, but it doesn't actually do any plotting. One consequence of this is that methods can be called in any order, and many of them can be called multiple times.\n", + "\n", + "When does the plot actually get rendered? :class:`Plot` is optimized for use in notebook environments. The rendering is automatically triggered when the :class:`Plot` gets displayed in the Jupyter REPL. That's why we didn't see anything in the example above, where we defined a :class:`Plot` but assigned it to `p` rather than letting it return out to the REPL.\n", + "\n", + "To see a plot in a notebook, either return it from the final line of a cell or call Jupyter's built-in `display` function on the object. The notebook integration bypasses :mod:`matplotlib.pyplot` entirely, but you can use its figure-display machinery in other contexts by calling :meth:`Plot.show`.\n", + "\n", + "You can also save the plot to a file (or buffer) by calling :meth:`Plot.save`." + ] + }, + { + "cell_type": "raw", + "id": "abfa0384-af88-4409-a119-912601a14f13", + "metadata": {}, + "source": [ + "Customizing the appearance\n", + "--------------------------\n", + "\n", + "The new interface aims to support a deep amount of customization through :class:`Plot`, reducing the need to switch gears and use matplotlib functionality directly. (But please be patient; not all of the features needed to achieve this goal have been implemented!)\n", + "\n", + "Parameterizing scales\n", + "~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "All of the data-dependent properties are controlled by the concept of a :class:`Scale` and the :meth:`Plot.scale` method. This method accepts several different types of arguments. One possibility, which is closest to the use of scales in matplotlib, is to pass the name of a function that transforms the coordinates:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5acfe6d2-144a-462d-965b-2900fb619eac", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, x=\"carat\", y=\"price\")\n", + " .add(so.Dots())\n", + " .scale(y=\"log\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "ccff884b-53cb-4c15-aab2-f5d4e5551d72", + "metadata": {}, + "source": [ + ":meth:`Plot.scale` can also control the mappings for semantic properties like `color`. You can directly pass it any argument that you would pass to the `palette` parameter in seaborn's function interface:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f243a31-d7da-43d2-8dc4-aad1b584ff48", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, x=\"carat\", y=\"price\", color=\"clarity\")\n", + " .add(so.Dots())\n", + " .scale(color=\"flare\")\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "4fdf291e-a008-4a8e-8ced-a24f78d9b49f", + "metadata": {}, + "source": [ + "Another option is to provide a tuple of `(min, max)` values, controlling the range that the scale should map into. This works both for numeric properties and for colors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cdc12ee-83f9-4472-b198-85bfe5cf0e4f", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, x=\"carat\", y=\"price\", color=\"clarity\", pointsize=\"carat\")\n", + " .add(so.Dots())\n", + " .scale(color=(\"#88c\", \"#555\"), pointsize=(2, 10))\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e326bf46-a296-4997-8e91-6531a7eef304", + "metadata": {}, + "source": [ + "For additional control, you can pass a :class:`Scale` object. There are several different types of :class:`Scale`, each with appropriate parameters. For example, :class:`Continuous` lets you define the input domain (`norm`), the output range (`values`), and the function that maps between them (`trans`), while :class:`Nominal` allows you to specify an ordering:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53682db4-2ba4-4dfd-80c2-1fef466cfab2", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, x=\"carat\", y=\"price\", color=\"carat\", marker=\"cut\")\n", + " .add(so.Dots())\n", + " .scale(\n", + " color=so.Continuous(\"crest\", norm=(0, 3), trans=\"sqrt\"),\n", + " marker=so.Nominal([\"o\", \"+\", \"x\"], order=[\"Ideal\", \"Premium\", \"Good\"]),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "7bf112fe-136d-4e63-a397-1e7d2ff4f543", + "metadata": {}, + "source": [ + "Customizing legends and ticks\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The :class:`Scale` objects are also how you specify which values should appear as tick labels / in the legend, along with how they appear. For example, the :meth:`Continuous.tick` method lets you control the density or locations of the ticks, and the :meth:`Continuous.label` method lets you modify the format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f8e821f-bd19-4af1-bb66-488593b3c968", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(diamonds, x=\"carat\", y=\"price\", color=\"carat\")\n", + " .add(so.Dots())\n", + " .scale(\n", + " x=so.Continuous().tick(every=0.5),\n", + " y=so.Continuous().label(like=\"${x:.0f}\"),\n", + " color=so.Continuous().tick(at=[1, 2, 3, 4]),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "4f6646c9-084b-49ae-ad6f-39c0bd12fc4e", + "metadata": {}, + "source": [ + "Customizing limits, labels, and titles\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + ":class:`Plot` has a number of methods for simple customization, including :meth:`Plot.label`, :meth:`Plot.limit`, and :meth:`Plot.share`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9586669-35ea-4784-9594-ea375a06aec0", + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " so.Plot(penguins, x=\"body_mass_g\", y=\"species\", color=\"island\")\n", + " .facet(col=\"sex\")\n", + " .add(so.Dot(), so.Jitter(.5))\n", + " .share(x=False)\n", + " .limit(y=(2.5, -.5))\n", + " .label(\n", + " x=\"Body mass (g)\", y=\"\",\n", + " color=str.capitalize,\n", + " title=\"{} penguins\".format,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "3b38607a-9b41-49c0-8031-e05bc87701c8", + "metadata": {}, + "source": [ + "Theme customization\n", + "~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Finally, :class:`Plot` supports data-independent theming through the :class:`Plot.theme` method. Currently, this method accepts a dictionary of matplotlib rc parameters. You can set them directly and/or pass a package of parameters from seaborn's theming functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2df40831-fd41-4b76-90ff-042aecd694d4", + "metadata": {}, + "outputs": [], + "source": [ + "from seaborn import axes_style\n", + "theme_dict = {**axes_style(\"whitegrid\"), \"grid.linestyle\": \":\"}\n", + "so.Plot().theme(theme_dict)" + ] + }, + { + "cell_type": "raw", + "id": "475d5157-5e88-473e-991f-528219ed3744", + "metadata": {}, + "source": [ + "To change the theme for all :class:`Plot` instances, update the settings in :attr:`Plot.config`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41ac347c-766f-495c-8a7f-43fee8cad29a", + "metadata": {}, + "outputs": [], + "source": [ + "so.Plot.config.theme.update(theme_dict)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_tutorial/properties.ipynb b/doc/_tutorial/properties.ipynb new file mode 100644 index 0000000000..913cb5ac08 --- /dev/null +++ b/doc/_tutorial/properties.ipynb @@ -0,0 +1,1127 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "6cb222bb-4781-48b6-9675-c0ba195b5efb", + "metadata": {}, + "source": [ + ".. _properties_tutorial:\n", + "\n", + "Properties of Mark objects\n", + "===========================" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae9d52dc-55ad-4804-a533-f2b724d0b85b", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib as mpl\n", + "import seaborn.objects as so\n", + "from seaborn import axes_style, color_palette" + ] + }, + { + "cell_type": "raw", + "id": "dd828c60-3895-46e4-a2f4-782a6e6cd9a6", + "metadata": {}, + "source": [ + "Coordinate properties\n", + "---------------------" + ] + }, + { + "cell_type": "raw", + "id": "fa97cc40-f02f-477b-90ec-a764b7253b68", + "metadata": {}, + "source": [ + ".. _coordinate_property:\n", + "\n", + "x, y, xmin, xmax, ymin, ymax\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Coordinate properties determine where a mark is drawn on a plot. Canonically, the `x` coordinate is the horizontal position and the `y` coordinate is the vertical position. Some marks accept a span (i.e., `min`, `max`) parameterization for one or both variables. Others may accept `x` and `y` but also use a `baseline` parameter to show a span. The layer's `orient` parameter determines how this works.\n", + "\n", + "If a variable does not contain numeric data, its scale will apply a conversion so that data can be drawn on a screen. For instance, :class:`Nominal` scales assign an integer index to each distinct category, and :class:`Temporal` scales represent dates as the number of days from a reference \"epoch\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b418365-b99c-45d6-bf1e-e347e2b9012a", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "(\n", + " so.Plot(y=[0, 0, 0])\n", + " .pair(x=[\n", + " [1, 2, 3],\n", + " [\"A\", \"B\", \"C\"],\n", + " np.array([\"2020-01-01\", \"2020-02-01\", \"2020-03-01\"], dtype=\"datetime64\"),\n", + " ])\n", + " .limit(\n", + " x0=(0, 10),\n", + " x1=(-.5, 2.5),\n", + " x2=(pd.Timestamp(\"2020-01-01\"), pd.Timestamp(\"2020-03-01\"))\n", + " )\n", + " .scale(y=so.Continuous().tick(count=0), x2=so.Temporal().label(concise=True))\n", + " .layout(size=(7, 1), engine=\"tight\")\n", + " .label(x0=\"Continuous\", x1=\"Nominal\", x2=\"Temporal\")\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " })\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0ae06665-2ce5-470d-b90a-02d990221fc5", + "metadata": {}, + "source": [ + "A :class:`Continuous` scale can also apply a nonlinear transform between data values and spatial positions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b731a3bb-a52e-4b12-afbb-b036753adcbe", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "(\n", + " so.Plot(y=[0, 0, 0])\n", + " .pair(x=[[1, 10, 100], [-100, 0, 100], [0, 10, 40]])\n", + " .limit(\n", + " )\n", + " .add(so.Dot(marker=\"\"))\n", + " .scale(\n", + " y=so.Continuous().tick(count=0),\n", + " x0=so.Continuous(trans=\"log\"),\n", + " x1=so.Continuous(trans=\"symlog\").tick(at=[-100, -10, 0, 10, 100]),\n", + " x2=so.Continuous(trans=\"sqrt\").tick(every=10),\n", + " )\n", + " .layout(size=(7, 1), engine=\"tight\")\n", + " .label(x0=\"trans='log'\", x1=\"trans='symlog'\", x2=\"trans='sqrt'\")\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"axes.labelpad\": 8,\n", + " })\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e384941a-da38-4e12-997d-d750b19b1fa6", + "metadata": { + "tags": [ + "hide-input", + "hide" + ] + }, + "outputs": [], + "source": [ + "# Hiding from the page but keeping around for now\n", + "(\n", + " so.Plot()\n", + " .add(\n", + " so.Dot(edgewidth=3, stroke=3),\n", + " so.Dodge(by=[\"group\"]),\n", + " x=[\"A\", \"A\", \"A\", \"A\", \"A\"],\n", + " y=[1.75, 2.25, 2.75, 2.0, 2.5],\n", + " color=[1, 2, 3, 1, 3],\n", + " marker=[mpl.markers.MarkerStyle(x) for x in \"os^+o\"],\n", + " pointsize=(9, 9, 9, 13, 10),\n", + " fill=[True, False, True, True, False],\n", + " group=[1, 2, 3, 4, 5], width=.5, legend=False,\n", + " )\n", + " .add(\n", + " so.Bar(edgewidth=2.5, alpha=.2, width=.9),\n", + " so.Dodge(gap=.05),\n", + " x=[\"B\", \"B\", \"B\",], y=[2, 2.5, 1.75], color=[1, 2, 3],\n", + " legend=False,\n", + " )\n", + " .add(\n", + " so.Range({\"capstyle\": \"round\"}, linewidth=3),\n", + " so.Dodge(by=[\"group\"]),\n", + " x=[\"C\", \"C\", \"C\"], ymin=[1.5, 1.75, 1.25], ymax=[2.5, 2.75, 2.25],\n", + " color=[1, 2, 2], linestyle=[\"-\", \"-\", \":\"],\n", + " group=[1, 2, 3], width=.5, legend=False,\n", + " )\n", + " .layout(size=(4, 4), engine=None)\n", + " .limit(x=(-.5, 2.5), y=(0, 3))\n", + " .label(x=\"X Axis (nominal)\", y=\"Y Axis (continuous)\")\n", + " .scale(\n", + " color=\"dark:C0_r\", #None,\n", + " fill=None, marker=None,\n", + " pointsize=None, linestyle=None,\n", + " y=so.Continuous().tick(every=1, minor=1)\n", + " )\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " \"axes.spines.top\": False, \"axes.spines.right\": False,\n", + " \"axes.labelsize\": 14,\n", + " })\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "8279d74f-0cd0-4ba8-80ed-c6051541d956", + "metadata": {}, + "source": [ + "Color properties\n", + "----------------" + ] + }, + { + "cell_type": "raw", + "id": "fca25527-6bbe-42d6-beea-a996a46d9761", + "metadata": {}, + "source": [ + ".. _color_property:\n", + "\n", + "color, fillcolor, edgecolor\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "All marks can be given a `color`, and many distinguish between the color of the mark's \"edge\" and \"fill\". Often, simply using `color` will set both, while the more-specific properties allow further control:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff7a1e64-7b02-45b8-b1e7-d7ec2bf1e7f7", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "no_spines = {\n", + " f\"axes.spines.{side}\": False\n", + " for side in [\"left\", \"right\", \"bottom\", \"top\"]\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1dda4c42-31f4-4316-baad-f30a465d3fd9", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "color_mark = so.Dot(marker=\"s\", pointsize=20, edgewidth=2.5, alpha=.7, edgealpha=1)\n", + "color_plot = (\n", + " so.Plot()\n", + " .theme({\n", + " **axes_style(\"white\"),\n", + " **no_spines,\n", + " \"axes.titlesize\": 15,\n", + " \"figure.subplot.wspace\": .1,\n", + " \"axes.xmargin\": .1,\n", + " })\n", + " .scale(\n", + " x=so.Continuous().tick(count=0),\n", + " y=so.Continuous().tick(count=0),\n", + " color=None, edgecolor=None,\n", + " )\n", + " .layout(size=(9, .5), engine=None)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54fc98b4-dc4c-45e1-a2a7-840a724fc746", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "n = 6\n", + "rgb = [f\"C{i}\" for i in range(n)]\n", + "(\n", + " color_plot\n", + " .facet([\"color\"] * n + [\"edgecolor\"] * n + [\"fillcolor\"] * n)\n", + " .add(\n", + " color_mark,\n", + " x=np.tile(np.arange(n), 3),\n", + " y=np.zeros(n * 3),\n", + " color=rgb + [\".8\"] * n + rgb,\n", + " edgecolor=rgb + rgb + [\".3\"] * n,\n", + " legend=False,\n", + " )\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "0dc26a01-6290-44f4-9815-5cea531207e2", + "metadata": {}, + "source": [ + "When the color property is mapped, the default palette depends on the type of scale. Nominal scales use discrete, unordered hues, while continuous scales (including temporal ones) use a sequential gradient:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6927a0d3-687b-4ca0-a425-0376b39f1b1f", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "n = 9\n", + "rgb = color_palette(\"deep\", n) + color_palette(\"ch:\", n)\n", + "(\n", + " color_plot\n", + " .facet([\"nominal\"] * n + [\"continuous\"] * n)\n", + " .add(\n", + " color_mark,\n", + " x=list(range(n)) * 2,\n", + " y=[0] * n * 2,\n", + " color=rgb,\n", + " legend=False,\n", + " )\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "e79d0da7-a53e-468c-9952-726eeae810d1", + "metadata": {}, + "source": [ + ".. note::\n", + " The default continuous scale is subject to change in future releases to improve discriminability.\n", + "\n", + "Color scales are parameterized by the name of a palette, such as `'viridis'`, `'rocket'`, or `'deep'`. Some palette names can include parameters, including simple gradients (e.g. `'dark:blue'`) or the cubehelix system (e.g. `'ch:start=.2,rot=-.4``). See the :doc:`color palette tutorial ` for guidance on making an appropriate selection.\n", + "\n", + "Continuous scales can also be parameterized by a tuple of colors that the scale should interpolate between. When using a nominal scale, it is possible to provide either the name of the palette (which will be discretely-sampled, if necessary), a list of individual color values, or a dictionary directly mapping data values to colors.\n", + "\n", + "Individual colors may be specified `in a wide range of formats `_. These include indexed references to the current color cycle (`'C0'`), single-letter shorthands (`'b'`), grayscale values (`'.4'`), RGB hex codes (`'#4c72b0'`), X11 color names (`'seagreen'`), and XKCD color survey names (`'purpleish'`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce7300dc-0ed2-4eb3-bd6f-2e42280f5e54", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "color_dict = {\n", + " \"cycle\": [\"C0\", \"C1\", \"C2\"],\n", + " \"short\": [\"r\", \"y\", \"b\"],\n", + " \"gray\": [\".3\", \".7\", \".5\"],\n", + " \"hex\": [\"#825f87\", \"#05696b\", \"#de7e5d\"],\n", + " \"X11\": [\"seagreen\", \"sienna\", \"darkblue\"],\n", + " \"XKCD\": [\"xkcd:gold\", \"xkcd:steel\", \"xkcd:plum\"],\n", + "}\n", + "groups = [k for k in color_dict for _ in range(3)]\n", + "colors = [c for pal in color_dict.values() for c in pal]\n", + "(\n", + " so.Plot(\n", + " x=[0] * len(colors),\n", + " y=[f\"'{c}'\" for c in colors],\n", + " color=colors,\n", + " )\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **no_spines,\n", + " \"axes.ymargin\": .2,\n", + " \"axes.titlesize\": 14,\n", + " \n", + " })\n", + " .facet(groups)\n", + " .layout(size=(8, 1.15), engine=\"constrained\")\n", + " .scale(x=so.Continuous().tick(count=0))\n", + " .add(color_mark)\n", + " .limit(x=(-.2, .5))\n", + " # .label(title=\"{} \".format)\n", + " .label(title=\"\")\n", + " .scale(color=None)\n", + " .share(y=False)\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "4ea6ac35-2a73-4dec-8b9b-bf15ba67f01b", + "metadata": {}, + "source": [ + ".. _alpha_property:\n", + "\n", + "alpha, fillalpha, edgealpha\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The `alpha` property determines the mark's opacity. Lowering the alpha can be helpful for representing density in the case of overplotting:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e73839d2-27c4-42b8-8587-9f6e99c8a464", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "rng = np.random.default_rng(3)\n", + "n_samp = 300\n", + "x = 1 - rng.exponential(size=n_samp)\n", + "y = rng.uniform(-1, 1, size=n_samp)\n", + "keep = np.sqrt(x ** 2 + y ** 2) < 1\n", + "x, y = x[keep], y[keep]\n", + "n = keep.sum()\n", + "alpha_vals = np.linspace(.1, .9, 9).round(1)\n", + "xs = np.concatenate([x for _ in alpha_vals])\n", + "ys = np.concatenate([y for _ in alpha_vals])\n", + "alphas = np.repeat(alpha_vals, n)\n", + "(\n", + " so.Plot(x=xs, y=ys, alpha=alphas)\n", + " .facet(alphas)\n", + " .add(so.Dot(color=\".2\", pointsize=3))\n", + " .scale(\n", + " alpha=None,\n", + " x=so.Continuous().tick(count=0),\n", + " y=so.Continuous().tick(count=0)\n", + " )\n", + " .layout(size=(9, 1), engine=None)\n", + " .theme({\n", + " **axes_style(\"white\"),\n", + " **no_spines,\n", + " })\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "a551732e-e8f5-45f0-9345-7ef45248d9d7", + "metadata": {}, + "source": [ + "Mapping the `alpha` property can also be useful even when marks do not overlap because it conveys a sense of importance and can be combined with a `color` scale to represent two variables. Moreover, colors with lower alpha appear less saturated, which can improve the appearance of larger filled marks (such as bars).\n", + "\n", + "As with `color`, some marks define separate `edgealpha` and `fillalpha` properties for additional control." + ] + }, + { + "cell_type": "raw", + "id": "77d168e4-0539-409f-8542-750d3981e22b", + "metadata": {}, + "source": [ + "Style properties\n", + "----------------" + ] + }, + { + "cell_type": "raw", + "id": "95e342fa-1086-4e63-81ae-dce1c628df9b", + "metadata": {}, + "source": [ + ".. _fill_property:\n", + "\n", + "fill\n", + "~~~~\n", + "\n", + "The `fill` property is relevant to marks with a distinction between the edge and interior and determines whether the interior is visible. It is a boolean state: `fill` can be set only to `True` or `False`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fb3b839-8bae-4392-b5f0-70dfc5a33c7a", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "nan = float(\"nan\")\n", + "x_bar = [0, 1]\n", + "y_bar = [2, 1]\n", + "f_bar = [True, False]\n", + "\n", + "x_dot = [2.2, 2.5, 2.8, 3.2, 3.5, 3.8]\n", + "y_dot = [1.2, 1.7, 1.4, 0.7, 1.2, 0.9]\n", + "f_dot = [True, True, True, False, False, False]\n", + "\n", + "xx = np.linspace(0, .8, 100)\n", + "yy = xx ** 2 * np.exp(-xx * 10)\n", + "x_area = list(4.5 + xx) + list(5.5 + xx)\n", + "y_area = list(yy / yy.max() * 2) + list(yy / yy.max())\n", + "f_area = [True] * 100 + [False] * 100\n", + "\n", + "(\n", + " so.Plot()\n", + " .add(\n", + " so.Bar(color=\".3\", edgecolor=\".2\", edgewidth=2.5),\n", + " x=x_bar + [nan for _ in x_dot + x_area],\n", + " y=y_bar + [nan for _ in y_dot + y_area],\n", + " fill=f_bar + [nan for _ in f_dot + f_area]\n", + " )\n", + " .add(\n", + " so.Dot(color=\".2\", pointsize=13, stroke=2.5),\n", + " x=[nan for _ in x_bar] + x_dot + [nan for _ in x_area],\n", + " y=[nan for _ in y_bar] + y_dot + [nan for _ in y_area],\n", + " fill=[nan for _ in f_bar] + f_dot + [nan for _ in f_area],\n", + " )\n", + " .add(\n", + " so.Area(color=\".2\", edgewidth=2.5),\n", + " x=[nan for _ in x_bar + x_dot] + x_area,\n", + " y=[nan for _ in y_bar + y_dot] + y_area,\n", + " fill=[nan for _ in f_bar + f_dot] + f_area,\n", + " )\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " \"axes.spines.left\": False,\n", + " \"axes.spines.top\": False,\n", + " \"axes.spines.right\": False,\n", + " \"xtick.labelsize\": 14,\n", + " })\n", + " .layout(size=(9, 1.25), engine=None)\n", + " .scale(\n", + " fill=None,\n", + " x=so.Continuous().tick(at=[0, 1, 2.5, 3.5, 4.8, 5.8]).label(\n", + " like={\n", + " 0: True, 1: False, 2.5: True, 3.5: False, 4.8: True, 5.8: False\n", + " }.get,\n", + " ),\n", + " y=so.Continuous().tick(count=0),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "119741b0-9eca-45a1-983e-35effc49c7fa", + "metadata": {}, + "source": [ + ".. _marker_property:\n", + "\n", + "marker\n", + "~~~~~~\n", + "\n", + "The `marker` property is relevant for dot marks and some line marks. The API for specifying markers is very flexible, as detailed in the matplotlib API docs: :mod:`matplotlib.markers`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba9c5aa-3d9c-47c7-8aee-5851e1f3c4dd", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "marker_plot = (\n", + " so.Plot()\n", + " .scale(marker=None, y=so.Continuous().tick(count=0))\n", + " .layout(size=(10, .5), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " \"axes.spines.left\": False,\n", + " \"axes.spines.top\": False,\n", + " \"axes.spines.right\": False,\n", + " \"xtick.labelsize\":12,\n", + " \"axes.xmargin\": .02,\n", + " })\n", + "\n", + ")\n", + "marker_mark = so.Dot(pointsize=15, color=\".2\", stroke=1.5)" + ] + }, + { + "cell_type": "raw", + "id": "3c07a874-18a1-485a-8d65-70ea3f246340", + "metadata": {}, + "source": [ + "Markers can be specified using a number of simple string codes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a764efd-df55-412b-8a01-8eba6f897893", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "marker_codes = [\n", + " \"o\", \"^\", \"v\", \"<\", \">\",\"s\", \"D\", \"d\", \"p\", \"h\", \"H\", \"8\",\n", + " \"X\", \"*\", \".\", \"P\", \"x\", \"+\", \"1\", \"2\", \"3\", \"4\", \"|\", \"_\",\n", + "]\n", + "x, y = [f\"'{m}'\" for m in marker_codes], [0] * len(marker_codes)\n", + "marker_objs = [mpl.markers.MarkerStyle(m) for m in marker_codes]\n", + "marker_plot.add(marker_mark, marker=marker_objs, x=x, y=y).plot()" + ] + }, + { + "cell_type": "raw", + "id": "1c614f08-3aa4-450d-bfe2-3295c29155d5", + "metadata": {}, + "source": [ + "They can also be programatically generated using a `(num_sides, fill_style, angle)` tuple:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9c1efe7-33e1-4add-9c4e-567d8dfbb821", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "marker_codes = [\n", + " (4, 0, 0), (4, 0, 45), (8, 0, 0),\n", + " (4, 1, 0), (4, 1, 45), (8, 1, 0),\n", + " (4, 2, 0), (4, 2, 45), (8, 2, 0),\n", + "]\n", + "x, y = [f\"{m}\" for m in marker_codes], [0] * len(marker_codes)\n", + "marker_objs = [mpl.markers.MarkerStyle(m) for m in marker_codes]\n", + "marker_plot.add(marker_mark, marker=marker_objs, x=x, y=y).plot()" + ] + }, + { + "cell_type": "raw", + "id": "dc518508-cb08-4508-a7f3-5762841da6fc", + "metadata": {}, + "source": [ + "See the matplotlib docs for additional formats, including mathtex character codes (`'$...$'`) and arrays of vertices.\n", + "\n", + "A marker property is always mapped with a nominal scale; there is no inherent ordering to the different shapes. If no scale is provided, the plot will programmatically generate a suitably large set of unique markers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3466dc10-07a5-470f-adac-c3c05326945d", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from seaborn._core.properties import Marker\n", + "n = 14\n", + "marker_objs = Marker()._default_values(n)\n", + "x, y = list(map(str, range(n))), [0] * n\n", + "marker_plot.add(marker_mark, marker=marker_objs, x=x, y=y).plot()" + ] + }, + { + "cell_type": "raw", + "id": "30916c65-6d4c-4294-a5e2-58af8b9392f3", + "metadata": {}, + "source": [ + "While this ensures that the shapes are technically distinct, bear in mind that — in most cases — it will be difficult to tell the markers apart if more than a handful are used in a single plot.\n", + "\n", + ".. note::\n", + " The default marker scale is subject to change in future releases to improve discriminability." + ] + }, + { + "cell_type": "raw", + "id": "3b1d0630-808a-4099-8bd0-768718f86f72", + "metadata": {}, + "source": [ + ".. _linestyle_property:\n", + "\n", + "linestyle, edgestyle\n", + "~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The `linestyle` property is relevant to line marks, and the `edgestyle` property is relevant to a number of marks with \"edges. Both properties determine the \"dashing\" of a line in terms of on-off segments.\n", + "\n", + "Dashes can be specified with a small number of shorthand codes (`'-'`, `'--'`, `'-.'`, and `':'`) or programatically using `(on, off, ...)` tuples. In the tuple specification, the unit is equal to the linewidth:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33a729db-84e4-4619-bd1a-1f60c77f7073", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "xx = np.linspace(0, 1, 100)\n", + "dashes = [\"-\", \"--\", \"-.\", \":\", (6, 2), (2, 1), (.5, .5), (4, 1, 2, 1)] \n", + "dash_data = (\n", + " pd.DataFrame({i: xx for i in range(len(dashes))})\n", + " .stack()\n", + " .reset_index(1)\n", + " .set_axis([\"y\", \"x\"], axis=1)\n", + " .reset_index(drop=True)\n", + ")\n", + "(\n", + " so.Plot(dash_data, \"x\", \"y\", linestyle=\"y\")\n", + " .add(so.Line(linewidth=1.7, color=\".2\"), legend=None)\n", + " .scale(\n", + " linestyle=dashes,\n", + " x=so.Continuous().tick(count=0),\n", + " y=so.Continuous().tick(every=1).label(like={\n", + " i: f\"'$\\mathtt{{{pat}}}$'\" if isinstance(pat, str) else pat\n", + " for i, pat in enumerate(dashes)\n", + " }.get)\n", + " )\n", + " .label(x=\"\", y=\"\")\n", + " .limit(x=(0, 1), y=(7.5, -0.5))\n", + " .layout(size=(9, 2.5), engine=None)\n", + " .theme({\n", + " **axes_style(\"white\"),\n", + " **no_spines,\n", + " \"ytick.labelsize\": 12,\n", + " })\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "41063f3b-a207-4f03-a606-78e2826be522", + "metadata": {}, + "source": [ + "Size properties\n", + "---------------" + ] + }, + { + "cell_type": "raw", + "id": "7a909d91-9d60-4e95-a855-18b2779f19ce", + "metadata": {}, + "source": [ + ".. _pointsize_property:\n", + "\n", + "pointsize\n", + "~~~~~~~~~\n", + "\n", + "The `pointsize` property is relevant to dot marks and to line marks that can show markers at individual data points. The units correspond to the diameter of the mark in points.\n", + "\n", + "Note that, while the parameterization corresponds to diameter, scales will be applied with a square root transform so that data values are linearly proportional to area:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b55b106d-ba14-43ec-ab9b-5d7a04fb813c", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "x = np.arange(1, 21)\n", + "y = [0 for _ in x]\n", + "(\n", + " so.Plot(x, y)\n", + " .add(so.Dots(color=\".2\", stroke=1), pointsize=x)\n", + " .layout(size=(9, .5), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"xtick.labelsize\": 12,\n", + " \"axes.xmargin\": .025,\n", + " })\n", + " .scale(\n", + " pointsize=None,\n", + " x=so.Continuous().tick(every=1),\n", + " y=so.Continuous().tick(count=0),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "66660d74-0252-4cb1-960a-c2c4823bb0e6", + "metadata": {}, + "source": [ + ".. _linewidth_property:\n", + "\n", + "linewidth\n", + "~~~~~~~~~\n", + "\n", + "The `linewidth` property is relevant to line marks and determines their thickness. The value should be non-negative and has point units:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a77c60d5-0d21-43a5-ab8c-f3f4abbc70ad", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "lw = np.arange(0.5, 5, .5)\n", + "x = [i for i in [0, 1] for _ in lw]\n", + "y = [*lw, *lw]\n", + "(\n", + " so.Plot(x=x, y=y, linewidth=y)\n", + " .add(so.Line(color=\".2\"))\n", + " .limit(y=(4.9, .1))\n", + " .layout(size=(9, 1.4), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"bottom\", \"right\", \"top\"]},\n", + " \"xtick.labelsize\": 12,\n", + " \"axes.xmargin\": .015,\n", + " \"ytick.labelsize\": 12,\n", + " })\n", + " .scale(\n", + " linewidth=None,\n", + " x=so.Continuous().tick(count=0),\n", + " y=so.Continuous().tick(every=1, between=(.5, 4.5), minor=1),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "dcbdfcb9-d55e-467a-8514-bdb4cc2bec90", + "metadata": {}, + "source": [ + ".. _edgewidth_property:\n", + "\n", + "edgewidth\n", + "~~~~~~~~~\n", + "\n", + "The `edgewidth` property is akin to `linewidth` but applies to marks with an edge/fill rather than to lines. It also has a different default range when used in a scale. The units are the same:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a1f1d5a-a2d5-4b8e-a172-73104f5ec715", + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "x = np.arange(0, 21) / 5\n", + "y = [0 for _ in x]\n", + "edge_plot = (\n", + " so.Plot(x, y)\n", + " .layout(size=(9, .5), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"xtick.labelsize\": 12,\n", + " \"axes.xmargin\": .02,\n", + " })\n", + " .scale(\n", + " x=so.Continuous().tick(every=1, minor=4),\n", + " y=so.Continuous().tick(count=0),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba70ed6c-d902-41b0-a043-d8f27bf65e9b", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "(\n", + " edge_plot\n", + " .add(so.Dot(color=\".75\", edgecolor=\".2\", marker=\"o\", pointsize=14), edgewidth=x)\n", + " .scale(edgewidth=None)\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "98a25a16-67fa-4467-a425-6a78a17c63ab", + "metadata": {}, + "source": [ + ".. _stroke_property:\n", + "\n", + "stroke\n", + "~~~~~~\n", + "\n", + "The `stroke` property is akin to `edgewidth` but applies when a dot mark is defined by its stroke rather than its fill. It also has a slightly different default scale range, but otherwise behaves similarly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f73a0428-a787-4f21-8098-848eb1c816fb", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "(\n", + " edge_plot\n", + " .add(so.Dot(color=\".2\", marker=\"x\", pointsize=11), stroke=x)\n", + " .scale(stroke=None)\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "c2ca33db-df52-4958-889a-320b4833a0d7", + "metadata": {}, + "source": [ + "Text properties\n", + "---------------" + ] + }, + { + "cell_type": "raw", + "id": "b75af2fe-4d81-407c-9858-23362710f25f", + "metadata": {}, + "source": [ + ".. _horizontalalignment_property:\n", + "\n", + ".. _verticalalignment_property:\n", + "\n", + "halign, valign\n", + "~~~~~~~~~~~~~~\n", + "\n", + "The `halign` and `valign` properties control the *horizontal* and *vertical* alignment of text marks. The options for horizontal alignment are `'left'`, `'right'`, and `'center'`, while the options for vertical alignment are `'top'`, `'bottom'`, `'center'`, `'baseline'`, and `'center_baseline'`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9588309-bee4-4b97-b428-eb91ea582105", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "x = [\"left\", \"right\", \"top\", \"bottom\", \"baseline\", \"center\"]\n", + "ha = x[:2] + [\"center\"] * 4\n", + "va = [\"center_baseline\"] * 2 + x[2:]\n", + "y = np.zeros(len(x))\n", + "(\n", + " so.Plot(x=[f\"'{_x_}'\" for _x_ in x], y=y, halign=ha, valign=va)\n", + " .add(so.Dot(marker=\"+\", color=\"r\", alpha=.5, stroke=1, pointsize=24))\n", + " .add(so.Text(text=\"XyZ\", fontsize=14, offset=0))\n", + " .scale(y=so.Continuous().tick(at=[]), halign=None, valign=None)\n", + " .limit(x=(-.25, len(x) - .75))\n", + " .layout(size=(9, .6), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"xtick.labelsize\": 12,\n", + " \"axes.xmargin\": .015,\n", + " \"ytick.labelsize\": 12,\n", + " })\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "ea74c7e5-798b-47bc-bc18-9086902fb5c6", + "metadata": {}, + "source": [ + ".. _fontsize_property:\n", + "\n", + "fontsize\n", + "~~~~~~~~\n", + "\n", + "The `fontsize` property controls the size of textual marks. The value has point units:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c515b790-385d-4521-b14a-0769c1902928", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from string import ascii_uppercase\n", + "n = 26\n", + "s = np.arange(n) + 1\n", + "y = np.zeros(n)\n", + "t = list(ascii_uppercase[:n])\n", + "(\n", + " so.Plot(x=s, y=y, text=t, fontsize=s)\n", + " .add(so.Text())\n", + " .scale(x=so.Nominal(), y=so.Continuous().tick(at=[]))\n", + " .layout(size=(9, .5), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"xtick.labelsize\": 12,\n", + " \"axes.xmargin\": .015,\n", + " \"ytick.labelsize\": 12,\n", + " })\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "4b367f36-fb96-44fa-83a3-1cc66c7a3279", + "metadata": {}, + "source": [ + ".. _offset_property:\n", + "\n", + "offset\n", + "~~~~~~\n", + "\n", + "The `offset` property controls the spacing between a text mark and its anchor position. It applies when *not* using `center` alignment (i.e., when using left/right or top/bottom). The value has point units. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25a49331-9580-4578-8bdb-d0d1829dde71", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "n = 17\n", + "x = np.linspace(0, 8, n)\n", + "y = np.full(n, .5)\n", + "(\n", + " so.Plot(x=x, y=y, offset=x)\n", + " .add(so.Bar(color=\".6\", edgecolor=\"k\"))\n", + " .add(so.Text(text=\"abc\", valign=\"bottom\"))\n", + " .scale(\n", + " x=so.Continuous().tick(every=1, minor=1),\n", + " y=so.Continuous().tick(at=[]),\n", + " offset=None,\n", + " )\n", + " .limit(y=(0, 1.5))\n", + " .layout(size=(9, .5), engine=None)\n", + " .theme({\n", + " **axes_style(\"ticks\"),\n", + " **{f\"axes.spines.{side}\": False for side in [\"left\", \"right\", \"top\"]},\n", + " \"axes.xmargin\": .015,\n", + " \"xtick.labelsize\": 12,\n", + " \"ytick.labelsize\": 12,\n", + " })\n", + " .plot()\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "77723ffd-2da3-4ece-a97a-3c00e864c743", + "metadata": {}, + "source": [ + "Other properties\n", + "----------------" + ] + }, + { + "cell_type": "raw", + "id": "287bb259-0194-4c8c-8836-5e3eb6d88e79", + "metadata": {}, + "source": [ + ".. _property_property:\n", + "\n", + "text\n", + "~~~~\n", + "\n", + "The `text` property is used to set the content of a textual mark. It is always used literally (not mapped), and cast to string when necessary.\n", + "\n", + "group\n", + "~~~~~\n", + "\n", + "The `group` property is special in that it does not change anything about the mark's appearance but defines additional data subsets that transforms should operate on independently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f23c9251-1685-4150-b5c2-ab5b0589d8e6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/_tutorial/regression.ipynb b/doc/_tutorial/regression.ipynb new file mode 100644 index 0000000000..d957101e07 --- /dev/null +++ b/doc/_tutorial/regression.ipynb @@ -0,0 +1,454 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _regression_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Estimating regression fits\n", + "==========================" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Many datasets contain multiple quantitative variables, and the goal of an analysis is often to relate those variables to each other. We :ref:`previously discussed ` functions that can accomplish this by showing the joint distribution of two variables. It can be very helpful, though, to use statistical models to estimate a simple relationship between two noisy sets of observations. The functions discussed in this chapter will do so through the common framework of linear regression.\n", + "\n", + "In the spirit of Tukey, the regression plots in seaborn are primarily intended to add a visual guide that helps to emphasize patterns in a dataset during exploratory data analyses. That is to say that seaborn is not itself a package for statistical analysis. To obtain quantitative measures related to the fit of regression models, you should use `statsmodels `_. The goal of seaborn, however, is to make exploring a dataset through visualization quick and easy, as doing so is just as (if not more) important than exploring a dataset through tables of statistics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "sns.set_theme(color_codes=True)\n", + "np.random.seed(sum(map(ord, \"regression\")))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Functions for drawing linear regression models\n", + "----------------------------------------------\n", + "\n", + "The two functions that can be used to visualize a linear fit are :func:`regplot` and :func:`lmplot`.\n", + "\n", + "In the simplest invocation, both functions draw a scatterplot of two variables, ``x`` and ``y``, and then fit the regression model ``y ~ x`` and plot the resulting regression line and a 95% confidence interval for that regression:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.regplot(x=\"total_bill\", y=\"tip\", data=tips);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", data=tips);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "These functions draw similar plots, but :func:`regplot` is an :doc:`axes-level function `, and :func:`lmplot` is a figure-level function. Additionally, :func:`regplot` accepts the ``x`` and ``y`` variables in a variety of formats including simple numpy arrays, :class:`pandas.Series` objects, or as references to variables in a :class:`pandas.DataFrame` object passed to `data`. In contrast, :func:`lmplot` has `data` as a required parameter and the `x` and `y` variables must be specified as strings. Finally, only :func:`lmplot` has `hue` as a parameter.\n", + "\n", + "The core functionality is otherwise similar, though, so this tutorial will focus on :func:`lmplot`:.\n", + "\n", + "It's possible to fit a linear regression when one of the variables takes discrete values, however, the simple scatterplot produced by this kind of dataset is often not optimal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"size\", y=\"tip\", data=tips);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "One option is to add some random noise (\"jitter\") to the discrete values to make the distribution of those values more clear. Note that jitter is applied only to the scatterplot data and does not influence the regression line fit itself:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"size\", y=\"tip\", data=tips, x_jitter=.05);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A second option is to collapse over the observations in each discrete bin to plot an estimate of central tendency along with a confidence interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"size\", y=\"tip\", data=tips, x_estimator=np.mean);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Fitting different kinds of models\n", + "---------------------------------\n", + "\n", + "The simple linear regression model used above is very simple to fit, however, it is not appropriate for some kinds of datasets. The `Anscombe's quartet `_ dataset shows a few examples where simple linear regression provides an identical estimate of a relationship where simple visual inspection clearly shows differences. For example, in the first case, the linear regression is a good model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anscombe = sns.load_dataset(\"anscombe\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'I'\"),\n", + " ci=None, scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The linear relationship in the second dataset is the same, but the plot clearly shows that this is not a good model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'II'\"),\n", + " ci=None, scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In the presence of these kind of higher-order relationships, :func:`lmplot` and :func:`regplot` can fit a polynomial regression model to explore simple kinds of nonlinear trends in the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'II'\"),\n", + " order=2, ci=None, scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "A different problem is posed by \"outlier\" observations that deviate for some reason other than the main relationship under study:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'III'\"),\n", + " ci=None, scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In the presence of outliers, it can be useful to fit a robust regression, which uses a different loss function to downweight relatively large residuals:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'III'\"),\n", + " robust=True, ci=None, scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When the ``y`` variable is binary, simple linear regression also \"works\" but provides implausible predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips[\"big_tip\"] = (tips.tip / tips.total_bill) > .15\n", + "sns.lmplot(x=\"total_bill\", y=\"big_tip\", data=tips,\n", + " y_jitter=.03);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The solution in this case is to fit a logistic regression, such that the regression line shows the estimated probability of ``y = 1`` for a given value of ``x``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"big_tip\", data=tips,\n", + " logistic=True, y_jitter=.03);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Note that the logistic regression estimate is considerably more computationally intensive (this is true of robust regression as well). As the confidence interval around the regression line is computed using a bootstrap procedure, you may wish to turn this off for faster iteration (using ``ci=None``).\n", + "\n", + "An altogether different approach is to fit a nonparametric regression using a `lowess smoother `_. This approach has the fewest assumptions, although it is computationally intensive and so currently confidence intervals are not computed at all:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", data=tips,\n", + " lowess=True, line_kws={\"color\": \"C1\"});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The :func:`residplot` function can be a useful tool for checking whether the simple regression model is appropriate for a dataset. It fits and removes a simple linear regression and then plots the residual values for each observation. Ideally, these values should be randomly scattered around ``y = 0``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'I'\"),\n", + " scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "If there is structure in the residuals, it suggests that simple linear regression is not appropriate:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.residplot(x=\"x\", y=\"y\", data=anscombe.query(\"dataset == 'II'\"),\n", + " scatter_kws={\"s\": 80});" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Conditioning on other variables\n", + "-------------------------------\n", + "\n", + "The plots above show many ways to explore the relationship between a pair of variables. Often, however, a more interesting question is \"how does the relationship between these two variables change as a function of a third variable?\" This is where the main differences between :func:`regplot` and :func:`lmplot` appear. While :func:`regplot` always shows a single relationship, :func:`lmplot` combines :func:`regplot` with :class:`FacetGrid` to show multiple fits using `hue` mapping or faceting.\n", + "\n", + "The best way to separate out a relationship is to plot both levels on the same axes and to use color to distinguish them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", hue=\"smoker\", data=tips);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Unlike :func:`relplot`, it's not possible to map a distinct variable to the style properties of the scatter plot, but you can redundantly code the `hue` variable with marker shape:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", hue=\"smoker\", data=tips,\n", + " markers=[\"o\", \"x\"], palette=\"Set1\");" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To add another variable, you can draw multiple \"facets\" with each level of the variable appearing in the rows or columns of the grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", hue=\"smoker\", col=\"time\", data=tips);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.lmplot(x=\"total_bill\", y=\"tip\", hue=\"smoker\",\n", + " col=\"time\", row=\"sex\", data=tips, height=3);" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Plotting a regression in other contexts\n", + "---------------------------------------\n", + "\n", + "A few other seaborn functions use :func:`regplot` in the context of a larger, more complex plot. The first is the :func:`jointplot` function that we introduced in the :ref:`distributions tutorial `. In addition to the plot styles previously discussed, :func:`jointplot` can use :func:`regplot` to show the linear regression fit on the joint axes by passing ``kind=\"reg\"``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.jointplot(x=\"total_bill\", y=\"tip\", data=tips, kind=\"reg\");" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Using the :func:`pairplot` function with ``kind=\"reg\"`` combines :func:`regplot` and :class:`PairGrid` to show the linear relationship between variables in a dataset. Take care to note how this is different from :func:`lmplot`. In the figure below, the two axes don't show the same relationship conditioned on two levels of a third variable; rather, :func:`PairGrid` is used to show multiple relationships between different pairings of the variables in a dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(tips, x_vars=[\"total_bill\", \"size\"], y_vars=[\"tip\"],\n", + " height=5, aspect=.8, kind=\"reg\");" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Conditioning on an additional categorical variable is built into both of these functions using the ``hue`` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.pairplot(tips, x_vars=[\"total_bill\", \"size\"], y_vars=[\"tip\"],\n", + " hue=\"smoker\", height=5, aspect=.8, kind=\"reg\");" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/_tutorial/relational.ipynb b/doc/_tutorial/relational.ipynb new file mode 100644 index 0000000000..f96ed638df --- /dev/null +++ b/doc/_tutorial/relational.ipynb @@ -0,0 +1,685 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _relational_tutorial:\n", + "\n", + ".. currentmodule:: seaborn" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Visualizing statistical relationships\n", + "=====================================\n", + "\n", + "Statistical analysis is a process of understanding how variables in a dataset relate to each other and how those relationships depend on other variables. Visualization can be a core component of this process because, when data are visualized properly, the human visual system can see trends and patterns that indicate a relationship.\n", + "\n", + "We will discuss three seaborn functions in this tutorial. The one we will use most is :func:`relplot`. This is a :doc:`figure-level function ` for visualizing statistical relationships using two common approaches: scatter plots and line plots. :func:`relplot` combines a :class:`FacetGrid` with one of two axes-level functions:\n", + "\n", + "- :func:`scatterplot` (with ``kind=\"scatter\"``; the default)\n", + "- :func:`lineplot` (with ``kind=\"line\"``)\n", + "\n", + "As we will see, these functions can be quite illuminating because they use simple and easily-understood representations of data that can nevertheless represent complex dataset structures. They can do so because they plot two-dimensional graphics that can be enhanced by mapping up to three additional variables using the semantics of hue, size, and style." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "sns.set_theme(style=\"darkgrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "np.random.seed(sum(map(ord, \"relational\")))" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _scatterplot_tutorial:\n", + "\n", + "Relating variables with scatter plots\n", + "-------------------------------------\n", + "\n", + "The scatter plot is a mainstay of statistical visualization. It depicts the joint distribution of two variables using a cloud of points, where each point represents an observation in the dataset. This depiction allows the eye to infer a substantial amount of information about whether there is any meaningful relationship between them.\n", + "\n", + "There are several ways to draw a scatter plot in seaborn. The most basic, which should be used when both variables are numeric, is the :func:`scatterplot` function. In the :ref:`categorical visualization tutorial `, we will see specialized tools for using scatterplots to visualize categorical data. The :func:`scatterplot` is the default ``kind`` in :func:`relplot` (it can also be forced by setting ``kind=\"scatter\"``):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips = sns.load_dataset(\"tips\")\n", + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "While the points are plotted in two dimensions, another dimension can be added to the plot by coloring the points according to a third variable. In seaborn, this is referred to as using a \"hue semantic\", because the color of the point gains meaning:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"smoker\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To emphasize the difference between the classes, and to improve accessibility, you can use a different marker style for each class:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips,\n", + " x=\"total_bill\", y=\"tip\", hue=\"smoker\", style=\"smoker\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to represent four variables by changing the hue and style of each point independently. But this should be done carefully, because the eye is much less sensitive to shape than to color:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips,\n", + " x=\"total_bill\", y=\"tip\", hue=\"smoker\", style=\"time\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In the examples above, the hue semantic was categorical, so the default :ref:`qualitative palette ` was applied. If the hue semantic is numeric (specifically, if it can be cast to float), the default coloring switches to a sequential palette:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\", hue=\"size\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "In both cases, you can customize the color palette. There are many options for doing so. Here, we customize a sequential palette using the string interface to :func:`cubehelix_palette`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips, \n", + " x=\"total_bill\", y=\"tip\",\n", + " hue=\"size\", palette=\"ch:r=-.5,l=.75\"\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The third kind of semantic variable changes the size of each point:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(data=tips, x=\"total_bill\", y=\"tip\", size=\"size\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Unlike with :func:`matplotlib.pyplot.scatter`, the literal value of the variable is not used to pick the area of the point. Instead, the range of values in data units is normalized into a range in area units. This range can be customized:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips, x=\"total_bill\", y=\"tip\",\n", + " size=\"size\", sizes=(15, 200)\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "More examples for customizing how the different semantics are used to show statistical relationships are shown in the :func:`scatterplot` API examples." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + ".. _lineplot_tutorial:\n", + "\n", + "Emphasizing continuity with line plots\n", + "--------------------------------------\n", + "\n", + "Scatter plots are highly effective, but there is no universally optimal type of visualisation. Instead, the visual representation should be adapted for the specifics of the dataset and to the question you are trying to answer with the plot.\n", + "\n", + "With some datasets, you may want to understand changes in one variable as a function of time, or a similarly continuous variable. In this situation, a good choice is to draw a line plot. In seaborn, this can be accomplished by the :func:`lineplot` function, either directly or with :func:`relplot` by setting ``kind=\"line\"``:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dowjones = sns.load_dataset(\"dowjones\")\n", + "sns.relplot(data=dowjones, x=\"Date\", y=\"Price\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Aggregation and representing uncertainty\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "More complex datasets will have multiple measurements for the same value of the ``x`` variable. The default behavior in seaborn is to aggregate the multiple measurements at each ``x`` value by plotting the mean and the 95% confidence interval around the mean:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fmri = sns.load_dataset(\"fmri\")\n", + "sns.relplot(data=fmri, x=\"timepoint\", y=\"signal\", kind=\"line\")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The confidence intervals are computed using bootstrapping, which can be time-intensive for larger datasets. It's therefore possible to disable them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", errorbar=None,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Another good option, especially with larger data, is to represent the spread of the distribution at each timepoint by plotting the standard deviation instead of a confidence interval:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", errorbar=\"sd\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "To turn off aggregation altogether, set the ``estimator`` parameter to ``None`` This might produce a strange effect when the data have multiple observations at each point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\",\n", + " estimator=None,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Plotting subsets of data with semantic mappings\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "The :func:`lineplot` function has the same flexibility as :func:`scatterplot`: it can show up to three additional variables by modifying the hue, size, and style of the plot elements. It does so using the same API as :func:`scatterplot`, meaning that we don't need to stop and think about the parameters that control the look of lines vs. points in matplotlib.\n", + "\n", + "Using semantics in :func:`lineplot` will also determine how the data get aggregated. For example, adding a hue semantic with two levels splits the plot into two lines and error bands, coloring each to indicate which subset of the data they correspond to." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"event\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Adding a style semantic to a line plot changes the pattern of dashes in the line by default:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\",\n", + " hue=\"region\", style=\"event\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "But you can identify subsets by the markers used at each observation, either together with the dashes or instead of them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"region\", style=\"event\",\n", + " dashes=False, markers=True,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "As with scatter plots, be cautious about making line plots using multiple semantics. While sometimes informative, they can also be difficult to parse and interpret. But even when you are only examining changes across one additional variable, it can be useful to alter both the color and style of the lines. This can make the plot more accessible when printed to black-and-white or viewed by someone with color blindness:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"event\", style=\"event\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When you are working with repeated measures data (that is, you have units that were sampled multiple times), you can also plot each sampling unit separately without distinguishing them through semantics. This avoids cluttering the legend:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri.query(\"event == 'stim'\"), kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"region\",\n", + " units=\"subject\", estimator=None,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The default colormap and handling of the legend in :func:`lineplot` also depends on whether the hue semantic is categorical or numeric:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dots = sns.load_dataset(\"dots\").query(\"align == 'dots'\")\n", + "sns.relplot(\n", + " data=dots, kind=\"line\",\n", + " x=\"time\", y=\"firing_rate\",\n", + " hue=\"coherence\", style=\"choice\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It may happen that, even though the ``hue`` variable is numeric, it is poorly represented by a linear color scale. That's the case here, where the levels of the ``hue`` variable are logarithmically scaled. You can provide specific color values for each line by passing a list or dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "palette = sns.cubehelix_palette(light=.8, n_colors=6)\n", + "sns.relplot(\n", + " data=dots, kind=\"line\", \n", + " x=\"time\", y=\"firing_rate\",\n", + " hue=\"coherence\", style=\"choice\", palette=palette,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Or you can alter how the colormap is normalized:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib.colors import LogNorm\n", + "palette = sns.cubehelix_palette(light=.7, n_colors=6)\n", + "sns.relplot(\n", + " data=dots.query(\"coherence > 0\"), kind=\"line\",\n", + " x=\"time\", y=\"firing_rate\",\n", + " hue=\"coherence\", style=\"choice\",\n", + " hue_norm=LogNorm(),\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "The third semantic, size, changes the width of the lines:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=dots, kind=\"line\",\n", + " x=\"time\", y=\"firing_rate\",\n", + " size=\"coherence\", style=\"choice\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "While the ``size`` variable will typically be numeric, it's also possible to map a categorical variable with the width of the lines. Be cautious when doing so, because it will be difficult to distinguish much more than \"thick\" vs \"thin\" lines. However, dashes can be hard to perceive when lines have high-frequency variability, so using different widths may be more effective in that case:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=dots, kind=\"line\",\n", + " x=\"time\", y=\"firing_rate\",\n", + " hue=\"coherence\", size=\"choice\", palette=palette,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Controlling sorting and orientation\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "\n", + "Because :func:`lineplot` assumes that you are most often trying to draw ``y`` as a function of ``x``, the default behavior is to sort the data by the ``x`` values before plotting. However, this can be disabled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "healthexp = sns.load_dataset(\"healthexp\").sort_values(\"Year\")\n", + "sns.relplot(\n", + " data=healthexp, kind=\"line\",\n", + " x=\"Spending_USD\", y=\"Life_Expectancy\", hue=\"Country\",\n", + " sort=False\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "It's also possible to sort (and aggregate) along the y axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"signal\", y=\"timepoint\", hue=\"event\",\n", + " orient=\"y\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Showing multiple relationships with facets\n", + "------------------------------------------\n", + "\n", + "We've emphasized in this tutorial that, while these functions *can* show several semantic variables at once, it's not always effective to do so. But what about when you do want to understand how a relationship between two variables depends on more than one other variable?\n", + "\n", + "The best approach may be to make more than one plot. Because :func:`relplot` is based on the :class:`FacetGrid`, this is easy to do. To show the influence of an additional variable, instead of assigning it to one of the semantic roles in the plot, use it to \"facet\" the visualization. This means that you make multiple axes and plot subsets of the data on each of them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=tips,\n", + " x=\"total_bill\", y=\"tip\", hue=\"smoker\", col=\"time\",\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "You can also show the influence of two variables this way: one by faceting on the columns and one by faceting on the rows. As you start adding more variables to the grid, you may want to decrease the figure size. Remember that the size :class:`FacetGrid` is parameterized by the height and aspect ratio of *each facet*:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide" + ] + }, + "outputs": [], + "source": [ + "subject_number = fmri[\"subject\"].str[1:].astype(int)\n", + "fmri= fmri.iloc[subject_number.argsort()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri, kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"subject\",\n", + " col=\"region\", row=\"event\", height=3,\n", + " estimator=None\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "When you want to examine effects across many levels of a variable, it can be a good idea to facet that variable on the columns and then \"wrap\" the facets into the rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.relplot(\n", + " data=fmri.query(\"region == 'frontal'\"), kind=\"line\",\n", + " x=\"timepoint\", y=\"signal\", hue=\"event\", style=\"event\",\n", + " col=\"subject\", col_wrap=5,\n", + " height=3, aspect=.75, linewidth=2.5,\n", + ")" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "These visualizations, which are sometimes called \"lattice\" plots or \"small-multiples\", are very effective because they present the data in a format that makes it easy for the eye to detect both overall patterns and deviations from those patterns. While you should make use of the flexibility afforded by :func:`scatterplot` and :func:`relplot`, always try to keep in mind that several simple plots are usually more effective than one complex plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "py310", + "language": "python", + "name": "py310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/api.rst b/doc/api.rst index bf3abacee8..189e790467 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,91 +1,259 @@ .. _api_ref: -.. currentmodule:: seaborn - API reference ============= -Regression plots ----------------- +.. currentmodule:: seaborn.objects + +.. _objects_api: + +Objects interface +----------------- + +Plot object +~~~~~~~~~~~ .. autosummary:: :toctree: generated/ + :template: plot + :nosignatures: - lmplot - regplot - pairplot - interactplot - residplot - coefplot - corrplot + Plot + +Mark objects +~~~~~~~~~~~~ + +.. rubric:: Dot marks + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Dot + Dots + +.. rubric:: Line marks + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Line + Lines + Path + Paths + Dash + Range + +.. rubric:: Bar marks + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Bar + Bars + +.. rubric:: Fill marks + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Area + Band + +.. rubric:: Text marks + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Text + +Stat objects +~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Agg + Est + Count + Hist + KDE + Perc + PolyFit + +Move objects +~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Dodge + Jitter + Norm + Stack + Shift + +Scale objects +~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: scale + :nosignatures: + + Boolean + Continuous + Nominal + Temporal + +Base classes +~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :template: object + :nosignatures: + + Mark + Stat + Move + Scale + +.. currentmodule:: seaborn + +Function interface +------------------ + +.. _relational_api: + +Relational plots +~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + relplot + scatterplot + lineplot + +.. _distribution_api: + +Distribution plots +~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + displot + histplot + kdeplot + ecdfplot + rugplot + distplot + +.. _categorical_api: Categorical plots ------------------ +~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ + :nosignatures: - factorplot + catplot + stripplot + swarmplot + boxplot + violinplot + boxenplot pointplot barplot countplot - boxplot - violinplot - stripplot -Distribution plots ------------------- +.. _regression_api: + +Regression plots +~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ + :nosignatures: - jointplot - distplot - kdeplot - rugplot + lmplot + regplot + residplot + +.. _matrix_api: Matrix plots ------------- +~~~~~~~~~~~~ .. autosummary:: - :toctree: generated/ + :toctree: generated/ + :nosignatures: heatmap clustermap -Timeseries plots +.. _grid_api: + +Multi-plot grids ---------------- +Facet grids +~~~~~~~~~~~ + .. autosummary:: :toctree: generated/ + :nosignatures: - tsplot + FacetGrid -Miscellaneous plots -------------------- +Pair grids +~~~~~~~~~~ .. autosummary:: :toctree: generated/ + :nosignatures: - palplot + pairplot + PairGrid -Axis grids ----------- +Joint grids +~~~~~~~~~~~ .. autosummary:: - :toctree: generated/ + :toctree: generated/ + :nosignatures: - FacetGrid - PairGrid + jointplot JointGrid -Style frontend --------------- +.. _style_api: + +Themeing +-------- .. autosummary:: :toctree: generated/ + :nosignatures: - set + set_theme axes_style set_style plotting_context @@ -93,12 +261,16 @@ Style frontend set_color_codes reset_defaults reset_orig + set + +.. _palette_api: Color palettes -------------- .. autosummary:: :toctree: generated/ + :nosignatures: set_palette color_palette @@ -114,10 +286,11 @@ Color palettes mpl_palette Palette widgets ---------------- +~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ + :nosignatures: choose_colorbrewer_palette choose_cubehelix_palette @@ -131,10 +304,13 @@ Utility functions .. autosummary:: :toctree: generated/ + :nosignatures: despine - desaturate + move_legend saturate + desaturate set_hls_values - ci_to_errsize - axlabel + load_dataset + get_dataset_names + get_data_home diff --git a/doc/citing.rst b/doc/citing.rst new file mode 100644 index 0000000000..5de2e0f075 --- /dev/null +++ b/doc/citing.rst @@ -0,0 +1,60 @@ +.. _citing: + +Citing and logo +=============== + +Citing seaborn +-------------- + +If seaborn is integral to a scientific publication, please cite it. +A paper describing seaborn has been published in the `Journal of Open Source Software `_: + + Waskom, M. L., (2021). seaborn: statistical data visualization. Journal of Open Source Software, 6(60), 3021, https://doi.org/10.21105/joss.03021. + +Here is a ready-made BibTeX entry: + +.. highlight:: none + +:: + + @article{Waskom2021, + doi = {10.21105/joss.03021}, + url = {https://doi.org/10.21105/joss.03021}, + year = {2021}, + publisher = {The Open Journal}, + volume = {6}, + number = {60}, + pages = {3021}, + author = {Michael L. Waskom}, + title = {seaborn: statistical data visualization}, + journal = {Journal of Open Source Software} + } + +In most situations where seaborn is cited, a citation to `matplotlib `_ would also be appropriate. + +Logo files +---------- + +Additional logo files, including hi-res PNGs and images suitable for use over a dark background, are available +`on GitHub `_. + +Wide logo +~~~~~~~~~ + +.. image:: _static/logo-wide-lightbg.svg + :width: 400px + +Tall logo +~~~~~~~~~ + +.. image:: _static/logo-tall-lightbg.svg + :width: 150px + +Logo mark +~~~~~~~~~ + +.. image:: _static/logo-mark-lightbg.svg + :width: 150px + +Credit to `Matthias Bussonnier `_ for the initial design +and implementation of the logo. diff --git a/doc/conf.py b/doc/conf.py index cac6a776b7..467527f3c4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,282 +1,179 @@ -# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. # -# seaborn documentation build configuration file, created by -# sphinx-quickstart on Mon Jul 29 23:25:46 2013. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html -import sys, os -import sphinx_bootstrap_theme -import matplotlib as mpl -mpl.use("Agg") +# -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ----------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# +import os +import sys +import time +import seaborn +from seaborn._core.properties import PROPERTIES -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sys.path.insert(0, os.path.abspath('sphinxext')) -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.autosummary', - 'plot_generator', - 'plot_directive', - 'numpydoc', - 'ipython_directive', - 'ipython_console_highlighting', - ] -# Generate the API documentation when building -autosummary_generate = True -numpydoc_show_class_members = False -# Include the example source for plots in API docs -plot_include_source = True -plot_formats = [("png", 90)] -plot_html_show_formats = False -plot_html_show_source_link = False +# -- Project information ----------------------------------------------------- + +project = 'seaborn' +copyright = f'2012-{time.strftime("%Y")}' +author = 'Michael Waskom' +version = release = seaborn.__version__ + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (amed 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.coverage', + 'sphinx.ext.mathjax', + 'sphinx.ext.autosummary', + 'sphinx.ext.intersphinx', + 'matplotlib.sphinxext.plot_directive', + 'gallery_generator', + 'tutorial_builder', + 'numpydoc', + 'sphinx_copybutton', + 'sphinx_issues', + 'sphinx_design', +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'seaborn' -copyright = u'2012-2015, Michael Waskom' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -sys.path.insert(0, os.path.abspath(os.path.pardir)) -import seaborn -version = seaborn.__version__ -# The full version, including alpha/beta/rc tags. -release = seaborn.__version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# The root document. +root_doc = 'index' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'docstrings', 'nextgen', 'Thumbs.db', '.DS_Store'] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +default_role = 'literal' -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True +# Generate the API documentation when building +autosummary_generate = True +numpydoc_show_class_members = False -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False +# Sphinx-issues configuration +issues_github_path = 'mwaskom/seaborn' -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +# Include the example source for plots in API docs +plot_include_source = True +plot_formats = [('png', 90)] +plot_html_show_formats = False +plot_html_show_source_link = False -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# Don't add a source link in the sidebar +html_show_sourcelink = False +# Control the appearance of type hints +autodoc_typehints = "none" +autodoc_typehints_format = "short" -# -- Options for HTML output --------------------------------------------------- +# Allow shorthand references for main function interface +rst_prolog = """ +.. currentmodule:: seaborn +""" -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'bootstrap' +# Define replacements (used in whatsnew bullets) +rst_epilog = r""" -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = { - 'source_link_position': "footer", - 'bootswatch_theme': "flatly", - 'navbar_sidebarrel': False, - 'bootstrap_version': "3", - 'navbar_links': [("Tutorial", "tutorial"), - ("Gallery", "examples/index")], +.. role:: raw-html(raw) + :format: html - } +.. role:: raw-latex(raw) + :format: latex -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() +.. |API| replace:: :raw-html:`API` :raw-latex:`{\small\sc [API]}` +.. |Defaults| replace:: :raw-html:`Defaults` :raw-latex:`{\small\sc [Defaults]}` +.. |Docs| replace:: :raw-html:`Docs` :raw-latex:`{\small\sc [Docs]}` +.. |Feature| replace:: :raw-html:`Feature` :raw-latex:`{\small\sc [Feature]}` +.. |Enhancement| replace:: :raw-html:`Enhancement` :raw-latex:`{\small\sc [Enhancement]}` +.. |Fix| replace:: :raw-html:`Fix` :raw-latex:`{\small\sc [Fix]}` +.. |Build| replace:: :raw-html:`Build` :raw-latex:`{\small\sc [Deps]}` -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None +""" # noqa -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +rst_epilog += "\n".join([ + f".. |{key}| replace:: :ref:`{key} <{val.__class__.__name__.lower()}_property>`" + for key, val in PROPERTIES.items() +]) -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None +# -- Options for HTML output ------------------------------------------------- -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'pydata_sphinx_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". +# so a file named 'default.css' will overwrite the builtin 'default.css'. html_static_path = ['_static', 'example_thumbs'] +for path in html_static_path: + if not os.path.exists(path): + os.makedirs(path) -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +html_css_files = [f'css/custom.css?v={seaborn.__version__}'] -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True +html_logo = "_static/logo-wide-lightbg.svg" +html_favicon = "_static/favicon.ico" -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'seaborndoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', +html_theme_options = { + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/mwaskom/seaborn", + "icon": "fab fa-github", + "type": "fontawesome", + }, + { + "name": "StackOverflow", + "url": "https://stackoverflow.com/tags/seaborn", + "icon": "fab fa-stack-overflow", + "type": "fontawesome", + }, + { + "name": "Twitter", + "url": "https://twitter.com/michaelwaskom", + "icon": "fab fa-twitter", + "type": "fontawesome", + }, + ], + "show_prev_next": False, + "navbar_start": ["navbar-logo"], + "navbar_end": ["navbar-icon-links"], + "header_links_before_dropdown": 8, } -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'seaborn.tex', u'seaborn Documentation', - u'Michael Waskom', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'seaborn', u'seaborn Documentation', - [u'Michael Waskom'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------------ - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'seaborn', u'seaborn Documentation', - u'Michael Waskom', 'seaborn', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +html_context = { + "default_mode": "light", +} -# If false, no module index is generated. -#texinfo_domain_indices = True +html_sidebars = { + "index": [], + "examples/index": [], + "**": ["sidebar-nav-bs.html"], +} -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# -- Intersphinx ------------------------------------------------ -# Add the 'copybutton' javascript, to hide/show the prompt in code -# examples, originally taken from scikit-learn's doc/conf.py -def setup(app): - app.add_javascript('copybutton.js') - app.add_stylesheet('style.css') +intersphinx_mapping = { + 'numpy': ('https://numpy.org/doc/stable/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/', None), + 'matplotlib': ('https://matplotlib.org/stable', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + 'statsmodels': ('https://www.statsmodels.org/stable/', None) +} diff --git a/seaborn/tests/__init__.py b/doc/example_thumbs/.gitkeep similarity index 100% rename from seaborn/tests/__init__.py rename to doc/example_thumbs/.gitkeep diff --git a/doc/faq.rst b/doc/faq.rst new file mode 100644 index 0000000000..ae3995aab1 --- /dev/null +++ b/doc/faq.rst @@ -0,0 +1,381 @@ +.. currentmodule:: seaborn + +Frequently asked questions +========================== + +This is a collection of answers to questions that are commonly raised about seaborn. + +Getting started +--------------- + +.. _faq_cant_import: + +I've installed seaborn, why can't I import it? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*It looks like you successfully installed seaborn by doing* `pip install seaborn` *but it cannot be imported. You get an error like "ModuleNotFoundError: No module named 'seaborn'" when you try.* + +This is probably not a `seaborn` problem, *per se*. If you have multiple Python environments on your computer, it is possible that you did `pip install` in one environment and tried to import the library in another. On a unix system, you could check whether the terminal commands `which pip`, `which python`, and (if applicable) `which jupyter` point to the same `bin/` directory. If not, you'll need to sort out the definition of your `$PATH` variable. + +Two alternate patterns for installing with `pip` may also be more robust to this problem: + +- Invoke `pip` on the command line with `python -m pip install ` rather than `pip install ` +- Use `%pip install ` in a Jupyter notebook to install it in the same place as the kernel + +.. _faq_import_fails: + +I can't import seaborn, even though it's definitely installed! +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You've definitely installed seaborn in the right place, but importing it produces a long traceback and a confusing error message, perhaps something like* `ImportError: DLL load failed: The specified module could not be found`. + +Such errors usually indicate a problem with the way Python libraries are using compiled resources. Because seaborn is pure Python, it won't directly encounter these problems, but its dependencies (numpy, scipy, matplotlib, and pandas) might. To fix the issue, you'll first need to read through the traceback and figure out which dependency was being imported at the time of the error. Then consult the installation documentation for the relevant package, which might have advice for getting an installation working on your specific system. + +The most common culprit of these issues is scipy, which has many compiled components. Starting in seaborn version 0.12, scipy is an optional dependency, which should help to reduce the frequency of these issues. + +.. _faq_no_plots: + +Why aren't my plots showing up? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You're calling seaborn functions — maybe in a terminal or IDE with an integrated IPython console — but not seeing any plots.)* + +In matplotlib, there is a distinction between *creating* a figure and *showing* it, and in some cases it's necessary to explicitly call :func:`matplotlib.pyplot.show` at the point when you want to see the plot. Because that command blocks by default and is not always desired (for instance, you may be executing a script that saves files to disk) seaborn does not deviate from standard matplotlib practice here. + +Yet most of the examples in the seaborn docs do not have this line, because there are multiple ways to avoid needing it. In a Jupyter notebook with the `"inline" `_ (default) or `"widget" `_ backends, :func:`matplotlib.pyplot.show` is automatically called after executing a cell, so any figures will appear in the cell's outputs. You can also activate a more interactive experience by executing `%matplotlib` in any Jupyter or IPython interface or by calling :func:`matplotlib.pyplot.ion` anywhere in Python. Both methods will configure matplotlib to show or update the figure after every plotting command. + +.. _faq_repl_output: + +Why is something printed after every notebook cell? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You're using seaborn in a Jupyter notebook, and every cell prints something like or before showing the plot.* + +Jupyter notebooks will show the result of the final statement in the cell as part of its output, and each of seaborn's plotting functions return a reference to the matplotlib or seaborn object that contain the plot. If this is bothersome, you can suppress this output in a few ways: + +- Always assign the result of the final statement to a variable (e.g. `ax = sns.histplot(...)`) +- Add a semicolon to the end of the final statement (e.g. `sns.histplot(...);`) +- End every cell with a function that has no return value (e.g. `plt.show()`, which isn't needed but also causes no problems) +- Add `cell metadata tags `_, if you're converting the notebook to a different representation + +.. _faq_inline_dpi: + +Why do the plots look fuzzy in a Jupyter notebook? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default "inline" backend (defined by `IPython `_) uses an unusually low dpi (`"dots per inch" `_) for figure output. This is a space-saving measure: lower dpi figures take up less disk space. (Also, lower dpi inline graphics appear *physically* smaller because they are represented as `PNGs `_, which do not exactly have a concept of resolution.) So one faces an economy/quality tradeoff. + +You can increase the DPI by resetting the rc parameters through the matplotlib API, using + +:: + + plt.rcParams.update({"figure.dpi": 96}) + +Or do it as you activate the seaborn theme:: + + sns.set_theme(rc={"figure.dpi": 96}) + +If you have a high pixel-density monitor, you can make your plots sharper using "retina mode":: + + %config InlineBackend.figure_format = "retina" + +This won't change the apparent size of your plots in a Jupyter interface, but they might appear very large in other contexts (i.e. on GitHub). And they will take up 4x the disk space. Alternatively, you can make SVG plots:: + + %config InlineBackend.figure_format = "svg" + +This will configure matplotlib to emit `vector graphics `_ with "infinite resolution". The downside is that file size will now scale with the number and complexity of the artists in your plot, and in some cases (e.g., a large scatterplot matrix) the load will impact browser responsiveness. + +Tricky concepts +--------------- + +.. _faq_function_levels: + +What do "figure-level" and "axes-level" mean? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You've encountered the term "figure-level" or "axes-level", maybe in the seaborn docs, StackOverflow answer, or GitHub thread, but you don't understand what it means.* + +In brief, all plotting functions in seaborn fall into one of two categories: + +- "axes-level" functions, which plot onto a single subplot that may or may not exist at the time the function is called +- "figure-level" functions, which internally create a matplotlib figure, potentially including multiple subplots + +This design is intended to satisfy two objectives: + +- seaborn should offer functions that are "drop-in" replacements for matplotlib methods +- seaborn should be able to produce figures that show "facets" or marginal distributions on distinct subplots + +The figure-level functions always combine one or more axes-level functions with an object that manages the layout. So, for example, :func:`relplot` is a figure-level function that combines either :func:`scatterplot` or :func:`lineplot` with a :class:`FacetGrid`. In contrast, :func:`jointplot` is a figure-level function that can combine multiple different axes-level functions — :func:`scatterplot` and :func:`histplot` by default — with a :class:`JointGrid`. + +If all you're doing is creating a plot with a single seaborn function call, this is not something you need to worry too much about. But it becomes relevant when you want to customize at a level beyond what the API of each function offers. It is also the source of various other points of confusion, so it is an important distinction understand (at least broadly) and keep in mind. + +This is explained in more detail in the :doc:`tutorial ` and in `this blog post `_. + +.. _faq_categorical_plots: + +What is a "categorical plot" or "categorical function"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Next to the figure-level/axes-level distinction, this concept is probably the second biggest source of confusing behavior. + +Several :ref:`seaborn functions ` are referred to as "categorical" because they are designed to support a use-case where either the x or y variable in a plot is categorical (that is, the variable takes a finite number of potentially non-numeric values). + +At the time these functions were written, matplotlib did not have any direct support for non-numeric data types. So seaborn internally builds a mapping from unique values in the data to 0-based integer indexes, which is what it passes to matplotlib. If your data are strings, that's great, and it more-or-less matches how `matplotlib now handles `_ string-typed data. + +But a potential gotcha is that these functions *always do this by default*, even if both the x and y variables are numeric. This gives rise to a number of confusing behaviors, especially when mixing categorical and non-categorical plots (e.g., a combo bar-and-line plot). + +The v0.13 release added a `native_scale` parameter which provides control over this behavior. It is `False` by default, but setting it to `True` will preserve the original properties of the data used for categorical grouping. + +Specifying data +--------------- + +.. _faq_data_format: + +How does my data need to be organized? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get the most out of seaborn, your data should have a "long-form" or "tidy" representation. In a dataframe, `this means that `_ each variable has its own column, each observation has its own row, and each value has its own cell. With long-form data, you can succinctly and exactly specify a visualization by assigning variables in the dataset (columns) to roles in the plot. + +Data organization is a common stumbling block for beginners, in part because data are often not collected or stored in a long-form representation. Therefore, it is often necessary to `reshape `_ the data using pandas before plotting. Data reshaping can be a complex undertaking, requiring both a solid grasp of dataframe structure and knowledge of the pandas API. Investing some time in developing this skill can pay large dividends. + +But while seaborn is *most* powerful when provided with long-form data, nearly every seaborn function will accept and plot "wide-form" data too. You can trigger this by passing an object to seaborn's `data=` parameter without specifying other plot variables (`x`, `y`, ...). You'll be limited when using wide-form data: each function can make only one kind of wide-form plot. In most cases, seaborn tries to match what matplotlib or pandas would do with a dataset of the same structure. Reshaping your data into long-form will give you substantially more flexibility, but it can be helpful to take a quick look at your data very early in the process, and seaborn tries to make this possible. + +Understanding how your data should be represented — and how to get it that way if it starts out messy — is very important for making efficient and complete use of seaborn, and it is elaborated on at length in the :doc:`user-guide `. + +.. _faq_pandas_requirement: + +Does seaborn only work with pandas? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Generally speaking, no: seaborn is `quite flexible `_ about how your dataset needs to be represented. + +In most cases, :ref:`long-form data ` represented by multiple vector-like types can be passed directly to `x`, `y`, or other plotting parameters. Or you can pass a dictionary of vector types to `data` rather than a DataFrame. And when plotting with wide-form data, you can use a 2D numpy array or even nested lists to plot in wide-form mode. + +There are a couple older functions (namely, :func:`catplot` and :func:`lmplot`) that do require you to pass a :class:`pandas.DataFrame`. But at this point, they are the exception, and they will gain more flexibility over the next few release cycles. + +Layout problems +--------------- + +.. _faq_figure_size: + +How do I change the figure size? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is going to be more complicated than you might hope, in part because there are multiple ways to change the figure size in matplotlib, and in part because of the :ref:`figure-level/axes-level ` distinction in seaborn. + +In matplotlib, you can usually set the default size for all figures through the `rc parameters `_, specifically `figure.figsize`. And you can set the size of an individual figure when you create it (e.g. `plt.subplots(figsize=(w, h))`). If you're using an axes-level seaborn function, both of these will work as expected. + +Figure-level functions both ignore the default figure size and :ref:`parameterize the figure size differently `. When calling a figure-level function, you can pass values to `height=` and `aspect=` to set (roughly) the size of each *subplot*. The advantage here is that the size of the figure automatically adapts when you add faceting variables. But it can be confusing. + +Fortunately, there's a consistent way to set the exact figure size in a function-independent manner. Instead of setting the figure size when the figure is created, modify it after you plot by calling `obj.figure.set_size_inches(...)`, where `obj` is either a matplotlib axes (usually assigned to `ax`) or a seaborn `FacetGrid` (usually assigned to `g`). + +Note that :attr:`FacetGrid.figure` exists only on seaborn >= 0.11.2; before that you'll have to access :attr:`FacetGrid.fig`. + +Also, if you're making pngs (or in a Jupyter notebook), you can — perhaps surprisingly — scale all your plots up or down by :ref:`changing the dpi `. + +.. _faq_plot_misplaced: + +Why isn't seaborn drawing the plot where I tell it to? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You've explicitly created a matplotlib figure with one or more subplots and tried to draw a seaborn plot on it, but you end up with an extra figure and a blank subplot. Perhaps your code looks something like* + +:: + + f, ax = plt.subplots() + sns.catplot(..., ax=ax) + +This is a :ref:`figure-level/axes-level ` gotcha. Figure-level functions always create their own figure, so you can't direct them towards an existing axes the way you can with axes-level functions. Most functions will warn you when this happens, suggest the appropriate axes-level function, and ignore the `ax=` parameter. A few older functions might put the plot where you want it (because they internally pass `ax` to their axes-level function) while still creating an extra figure. This latter behavior should be considered a bug, and it is not to be relied on. + +The way things currently work, you can either set up the matplotlib figure yourself, or you can use a figure-level function, but you can't do both at the same time. + +.. _faq_categorical_line: + +Why can't I draw a line over a bar/box/strip/violin plot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You're trying to create a single plot using multiple seaborn functions, perhaps by drawing a lineplot or regplot over a barplot or violinplot. You expect the line to go through the mean value for each box (etc.), but it looks to be misalgined, or maybe it's all the way off to the side.* + +You are trying to combine a :ref:`"categorical plot" ` with another plot type. If your `x` variable has numeric values, it seems like this should work. But recall: seaborn's categorical plots map unique values on the categorical axis to integer indexes. So if your data have unique `x` values of 1, 6, 20, 94, the corresponding plot elements will get drawn at 0, 1, 2, 3 (and the tick labels will be changed to represent the actual value). + +The line or regression plot doesn't know that this has happened, so it will use the actual numeric values, and the plots won't line up at all. + +As of now, there are two ways to work around this. In situations where you want to draw a line, you could use the (somewhat misleadingly named) :func:`pointplot` function, which is also a "categorical" function and will use the same rules for drawing the plot. If this doesn't solve the problem (for one, it's not as visually flexible as :func:`lineplot`, you could implement the mapping from actual values to integer indexes yourself and draw the plot that way:: + + unique_xs = sorted(df["x"].unique()) + sns.violinplot(data=df, x="x", y="y") + sns.lineplot(data=df, x=df["x"].map(unique_xs.index), y="y") + +This is something that will be easier in a planned future release, as it will become possible to make the categorical functions treat numeric data as numeric. (As of v0.12, it's possible only in :func:`stripplot` and :func:`swarmplot`, using `native_scale=True`). + +How do I move the legend? +~~~~~~~~~~~~~~~~~~~~~~~~~ + +*When applying a semantic mapping to a plot, seaborn will automatically create a legend and add it to the figure. But the automatic choice of legend position is not always ideal.* + +With seaborn v0.11.2 or later, use the :func:`move_legend` function. + +On older versions, a common pattern was to call `ax.legend(loc=...)` after plotting. While this appears to move the legend, it actually *replaces* it with a new one, using any labeled artists that happen to be attached to the axes. This does `not consistently work `_ across plot types. And it does not propagate the legend title or positioning tweaks that are used to format a multi-variable legend. + +The :func:`move_legend` function is actually more powerful than its name suggests, and it can also be used to modify other `legend parameters `_ (font size, handle length, etc.) after plotting. + +Other customizations +-------------------- + +.. _faq_figure_customization: + +How can I can I change something about the figure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You want to make a very specific plot, and seaborn's defaults aren't doing it for you.* + +There's basically a four-layer hierarchy to customizing a seaborn figure: + +1. Explicit seaborn function parameters +2. Passed-through matplotlib keyword arguments +3. Matplotlib axes methods +4. Matplotlib artist methods + +First, read through the API docs for the relevant seaborn function. Each has a lot of parameters (probably too many), and you may be able to accomplish your desired customization using seaborn's own API. + +But seaborn does delegate a lot of customization to matplotlib. Most functions have `**kwargs` in their signature, which will catch extra keyword arguments and pass them through to the underlying matplotlib function. For example, :func:`scatterplot` has a number of parameters, but you can also use any valid keyword argument for :meth:`matplotlib.axes.Axes.scatter`, which it calls internally. + +Passing through keyword arguments lets you customize the artists that represent data, but often you will want to customize other aspects of the figure, such as labels, ticks, and titles. You can do this by calling methods on the object that seaborn's plotting functions return. Depending on whether you're calling an :ref:`axes-level or figure-level function `, this may be a :class:`matplotlib.axes.Axes` object or a seaborn wrapper (such as :class:`seaborn.FacetGrid`). Both kinds of objects have numerous methods that you can call to customize nearly anything about the figure. The easiest thing is usually to call :meth:`matplotlib.axes.Axes.set` or :meth:`seaborn.FacetGrid.set`, which let you modify multiple attributes at once, e.g.:: + + ax = sns.scatterplot(...) + ax.set( + xlabel="The x label", + ylabel="The y label", + title="The title" + xlim=(xmin, xmax), + xticks=[...], + xticklabels=[...], + ) + +Finally, the deepest customization may require you to reach "into" the matplotlib axes and tweak the artists that are stored on it. These will be in artist lists, such as `ax.lines`, `ax.collections`, `ax.patches`, etc. + +*Warning:* Neither matplotlib nor seaborn consider the specific artists produced by their plotting functions to be part of stable API. Because it's not possible to gracefully warn about upcoming changes to the artist types or the order in which they are stored, code that interacts with these attributes could break unexpectedly. With that said, seaborn does try hard to avoid making this kind of change. + +.. _faq_matplotlib_requirement: + +Wait, I need to learn how to use matplotlib too? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It really depends on how much customization you need. You can certainly perform a lot of exploratory data analysis while primarily or exclusively interacting with the seaborn API. But, if you're polishing a figure for a presentation or publication, you'll likely find yourself needing to understand at least a little bit about how matplotlib works. Matplotlib is extremely flexible, and it lets you control literally everything about a figure if you drill down far enough. + +Seaborn was originally designed with the idea that it would handle a specific set of well-defined operations through a very high-level API, while letting users "drop down" to matplotlib when they desired additional customization. This can be a pretty powerful combination, and it works reasonably well if you already know how to use matplotlib. But as seaborn as gained more features, it has become more feasible to learn seaborn *first*. In that situation, the need to switch APIs tends to be a bit more confusing / frustrating. This has motivated the development of seaborn's new :doc:`objects interface `, which aims to provide a more cohesive API for both high-level and low-level figure specification. Hopefully, it will alleviate the "two-library problem" as it matures. + +With that said, the level of deep control that matplotlib affords really can't be beat, so if you care about doing very specific things, it really is worth learning. + +.. _faq_object_oriented: + +How do I use seaborn with matplotlib's object-oriented interface? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You prefer to use matplotlib's explicit or* `"object-oriented" `_ *interface, because it makes your code easier to reason about and maintain. But the object-orient interface consists of methods on matplotlib objects, whereas seaborn offers you independent functions.* + +This is another case where it will be helpful to keep the :ref:`figure-level/axes-level ` distinction in mind. + +Axes-level functions can be used like any matplotlib axes method, but instead of calling `ax.func(...)`, you call `func(..., ax=ax)`. They also return the axes object (which they may have created, if no figure was currently active in matplotlib's global state). You can use the methods on that object to further customize the plot even if you didn't start with :func:`matplotlib.pyplot.figure` or :func:`matplotlib.pyplot.subplots`:: + + ax = sns.histplot(...) + ax.set(...) + +Figure-level functions :ref:`can't be directed towards an existing figure `, but they do store the matplotlib objects on the :class:`FacetGrid` object that they return (which seaborn docs always assign to a variable named `g`). + +If your figure-level function created only one subplot, you can access it directly:: + + g = sns.displot(...) + g.ax.set(...) + +For multiple subplots, you can either use :attr:`FacetGrid.axes` (which is always a 2D array of axes) or :attr:`FacetGrid.axes_dict` (which maps the row/col keys to the corresponding matplotlib object):: + + g = sns.displot(..., col=...) + for col, ax in g.axes_dict.items(): + ax.set(...) + +But if you're batch-setting attributes on all subplots, use the :meth:`FacetGrid.set` method rather than iterating over the individual axes:: + + g = sns.displot(...) + g.set(...) + +To access the underlying matplotlib *figure*, use :attr:`FacetGrid.figure` on seaborn >= 0.11.2 (or :attr:`FacetGrid.fig` on any other version). + +.. _faq_bar_annotations: + +Can I annotate bar plots with the bar values? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Nothing like this is built into seaborn, but matplotlib v3.4.0 added a convenience function (:meth:`matplotlib.axes.Axes.bar_label`) that makes it relatively easy. Here are a couple of recipes; note that you'll need to use a different approach depending on whether your bars come from a :ref:`figure-level or axes-level function `:: + + # Axes-level + ax = sns.histplot(df, x="x_var") + for bars in ax.containers: + ax.bar_label(bars) + + # Figure-level, one subplot + g = sns.displot(df, x="x_var") + for bars in g.ax.containers: + g.ax.bar_label(bars) + + # Figure-level, multiple subplots + g = sns.displot(df, x="x_var", col="col_var) + for ax in g.axes.flat: + for bars in ax.containers: + ax.bar_label(bars) + +.. _faq_dar_mode: + +Can I use seaborn in dark mode? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's no direct support for this in seaborn, but matplotlib has a `"dark_background" `_ style-sheet that you could use, e.g.:: + + sns.set_theme(style="ticks", rc=plt.style.library["dark_background"]) + +Note that "dark_background" changes the default color palette to "Set2", and that will override any palette you define in :func:`set_theme`. If you'd rather use a different color palette, you'll have to call :func:`sns.set_palette` separately. The default :doc:`seaborn palette ` ("deep") has poor contrast against a dark background, so you'd be better off using "muted", "bright", or "pastel". + +Statistical inquiries +--------------------- + +.. _faq_stat_results: + +Can I access the results of seaborn's statistical transformations? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Because seaborn performs some statistical operations as it builds plots (aggregating, bootstrapping, fitting regression models), some users would like access to the statistics that it computes. This is not possible: it's explicitly considered out of scope for seaborn (a visualization library) to offer an API for interrogating statistical models. + +If you simply want to be diligent and verify that seaborn is doing things correctly (or that it matches your own code), it's open-source, so feel free to read the code. Or, because it's Python, you can call into the private methods that calculate the stats (just don't do this in production code). But don't expect seaborn to offer features that are more at home in `scipy `_ or `statsmodels `_. + +.. _faq_standard_error: + +Can I show standard error instead of a confidence interval? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As of v0.12, this is possible in most places, using the new `errorbar` API (see the :doc:`tutorial ` for more details). + +.. _faq_kde_value: + +Why does the y axis for a KDE plot go above 1? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*You've estimated a probability distribution for your data using* :func:`kdeplot`, *but the y axis goes above 1. Aren't probabilities bounded by 1? Is this a bug?* + +This is not a bug, but it is a common confusion (about kernel density plots and probability distributions more broadly). A continuous probability distribution is defined by a `probability density function `_, which :func:`kdeplot` estimates. The probability density function does **not** output *a probability*: a continuous random variable can take an infinite number of values, so the probability of observing any *specific* value is infinitely small. You can only talk meaningfully about the probability of observing a value that falls within some *range*. The probability of observing a value that falls within the complete range of possible values is 1. Likewise, the probability density function is normalized so that the area under it (that is, the integral of the function across its domain) equals 1. If the range of likely values is small, the curve will have to go above 1 to make this possible. + +Common curiosities +------------------ + +.. _faq_import_convention: + +Why is seaborn imported as `sns`? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an obscure reference to the `namesake `_ of the library, but you can also think of it as "seaborn name space". + +.. _faq_seaborn_sucks: + +Why is ggplot so much better than seaborn? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Good question. Probably because you get to use the word "geom" a lot, and it's fun to say. "Geom". "Geeeeeooom". diff --git a/doc/index.rst b/doc/index.rst index 22ec8d0603..995fc04d1f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,130 +1,90 @@ -.. raw:: html - - - -Seaborn: statistical data visualization +:html_theme.sidebar_secondary.remove: + +seaborn: statistical data visualization ======================================= -.. raw:: html - - -
- -
- - -Seaborn is a Python visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics. - -For a brief introduction to the ideas behind the package, you can read the :ref:`introductory notes `. - -Much more detail can be found in the seaborn :ref:`tutorial `. You can also browse the :ref:`example gallery ` or :ref:`API reference ` to see the kind of tools that are available. - -To check out the code, report a bug, or contribute a new feature, please visit -the `github repository `_. You can also get -in touch on `twitter `_. - - -.. raw:: html - -
-
-
-

Documentation

-
-
-

Tutorial

-
-
-
-
- -.. toctree:: - :maxdepth: 1 - - introduction - whatsnew - installing - examples/index - api - -.. raw:: html - -
-
- -.. toctree:: - :maxdepth: 1 - - tutorial/aesthetics - tutorial/color_palettes - tutorial/plotting_distributions - tutorial/quantitative_linear_models - -.. raw:: html - -
-
- -.. toctree:: - :maxdepth: 1 - - tutorial/categorical_linear_models - tutorial/dataset_exploration - tutorial/timeseries_plots - tutorial/axis_grids - -.. raw:: html - -
-
-
+.. grid:: 6 + :gutter: 1 + + .. grid-item:: + + .. image:: example_thumbs/scatterplot_matrix_thumb.png + :target: ./examples/scatterplot_matrix.html + + .. grid-item:: + + .. image:: example_thumbs/errorband_lineplots_thumb.png + :target: examples/errorband_lineplots.html + + .. grid-item:: + + .. image:: example_thumbs/scatterplot_sizes_thumb.png + :target: examples/scatterplot_sizes.html + + .. grid-item:: + + .. image:: example_thumbs/timeseries_facets_thumb.png + :target: examples/timeseries_facets.html + + .. grid-item:: + + .. image:: example_thumbs/horizontal_boxplot_thumb.png + :target: examples/horizontal_boxplot.html + + .. grid-item:: + + .. image:: example_thumbs/regression_marginals_thumb.png + :target: examples/regression_marginals.html + +.. grid:: 1 1 3 3 + + .. grid-item:: + :columns: 12 12 6 6 + + Seaborn is a Python data visualization library based on `matplotlib + `_. It provides a high-level interface for drawing + attractive and informative statistical graphics. + + For a brief introduction to the ideas behind the library, you can read the + :doc:`introductory notes ` or the `paper + `_. Visit the + :doc:`installation page ` to see how you can download the package + and get started with it. You can browse the :doc:`example gallery + ` to see some of the things that you can do with seaborn, + and then check out the :doc:`tutorials ` or :doc:`API reference ` + to find out how. + + To see the code or report a bug, please visit the `GitHub repository + `_. General support questions are most at home + on `stackoverflow `_, which + has a dedicated channel for seaborn. + + .. grid-item-card:: Contents + :columns: 12 12 2 2 + :class-title: sd-fs-5 + :class-body: sd-pl-4 + + .. toctree:: + :maxdepth: 1 + + Installing + Gallery + Tutorial + API + Releases + Citing + FAQ + + .. grid-item-card:: Features + :columns: 12 12 4 4 + :class-title: sd-fs-5 + :class-body: sd-pl-3 + + * :bdg-secondary:`New` Objects: :ref:`API ` | :doc:`Tutorial ` + * Relational plots: :ref:`API ` | :doc:`Tutorial ` + * Distribution plots: :ref:`API ` | :doc:`Tutorial ` + * Categorical plots: :ref:`API ` | :doc:`Tutorial ` + * Regression plots: :ref:`API ` | :doc:`Tutorial ` + * Multi-plot grids: :ref:`API ` | :doc:`Tutorial ` + * Figure theming: :ref:`API ` | :doc:`Tutorial ` + * Color palettes: :ref:`API ` | :doc:`Tutorial ` diff --git a/doc/installing.rst b/doc/installing.rst index 49606d9e5c..d28a65ee67 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -1,65 +1,138 @@ .. _installing: +.. currentmodule:: seaborn + Installing and getting started ------------------------------ -To install the released version of seaborn, you can use ``pip`` (i.e. ``pip install seaborn``). -Alternatively, you can use ``pip`` to install the development version, with the command ``pip install -git+git://github.com/mwaskom/seaborn.git#egg=seaborn``. Another option would be -to to clone the `github repository `_ and -install with ``pip install .`` from the source directory. Seaborn itself is pure -Python, so installation should be reasonably straightforward. +Official releases of seaborn can be installed from `PyPI `_:: + + pip install seaborn + +The basic invocation of `pip` will install seaborn and, if necessary, its mandatory dependencies. +It is possible to include optional dependencies that give access to a few advanced features:: + + pip install seaborn[stats] + +The library is also included as part of the `Anaconda `_ distribution, +and it can be installed with `conda`:: + + conda install seaborn + +As the main Anaconda repository can be slow to add new releases, you may prefer using the +`conda-forge `_ channel:: + + conda install seaborn -c conda-forge -Dependencies +Dependencies ~~~~~~~~~~~~ -We recommend using seaborn with the `Anaconda distribution `_. +Supported Python versions +^^^^^^^^^^^^^^^^^^^^^^^^^ -- Python 2.7 or 3.3+ +- Python 3.8+ Mandatory dependencies ^^^^^^^^^^^^^^^^^^^^^^ -- `numpy `__ +- `numpy `__ -- `scipy `__ +- `pandas `__ -- `matplotlib `__ +- `matplotlib `__ -- `pandas `__ +Optional dependencies +^^^^^^^^^^^^^^^^^^^^^ -Recommended dependencies -^^^^^^^^^^^^^^^^^^^^^^^^ +- `statsmodels `__, for advanced regression plots -- `statsmodels `__ +- `scipy `__, for clustering matrices and some advanced options -- `patsy `__ +- `fastcluster `__, faster clustering of large matrices -Version-wise, we make an attempt to keep seaborn working on the stable Debian -channels. There may be cases where some more advanced features only work with -newer versions of these dependencies, although these should be rare. There are -also some known bugs on older versions of matplotlib, so you should in general -try to use a modern version, but for many cases older matplotlibs will work -fine. Seaborn is tested on the most recent versions offered through ``conda``. +Quickstart +~~~~~~~~~~ -Import conventions -~~~~~~~~~~~~~~~~~~ +Once you have seaborn installed, you're ready to get started. +To test it out, you could load and plot one of the example datasets:: -By convention, ``seaborn`` is abbreviated to ``sns`` on imports. + import seaborn as sns + df = sns.load_dataset("penguins") + sns.pairplot(df, hue="species") -Testing -~~~~~~~ +If you're working in a Jupyter notebook or an IPython terminal with +`matplotlib mode `_ +enabled, you should immediately see :ref:`the plot `. +Otherwise, you may need to explicitly call :func:`matplotlib.pyplot.show`:: -To test seaborn, run ``make test`` in the root directory of the source -distribution. This runs the unit test suite (which can also be exercised -separately by running ``nosetests``). It also runs the code in the example -notebooks to smoke-test a broader and more realistic range of example usage. + import matplotlib.pyplot as plt + plt.show() -Bugs -~~~~ +While you can get pretty far with only seaborn imported, having access to +matplotlib functions is often useful. The tutorials and API documentation +typically assume the following imports:: -Please report any bugs you encounter through the github `issue tracker -`_. It will be most helpful to -upload an IPython notebook that can reproduce the error in a `gist -`_ and link to that gist in the bug report. + import numpy as np + import pandas as pd + + import matplotlib as mpl + import matplotlib.pyplot as plt + + import seaborn as sns + import seaborn.objects as so + +Debugging install issues +~~~~~~~~~~~~~~~~~~~~~~~~ + +The seaborn codebase is pure Python, and the library should generally install +without issue. Occasionally, difficulties will arise because the dependencies +include compiled code and link to system libraries. These difficulties +typically manifest as errors on import with messages such as ``"DLL load +failed"``. To debug such problems, read through the exception trace to +figure out which specific library failed to import, and then consult the +installation docs for that package to see if they have tips for your particular +system. + +In some cases, an installation of seaborn will appear to succeed, but trying +to import it will raise an error with the message ``"No module named +seaborn"``. This usually means that you have multiple Python installations on +your system and that your ``pip`` or ``conda`` points towards a different +installation than where your interpreter lives. Resolving this issue +will involve sorting out the paths on your system, but it can sometimes be +avoided by invoking ``pip`` with ``python -m pip install seaborn``. + +Getting help +~~~~~~~~~~~~ +If you think you've encountered a bug in seaborn, please report it on the +`GitHub issue tracker `_. +To be useful, bug reports must include the following information: + +- A reproducible code example that demonstrates the problem +- The output that you are seeing (an image of a plot, or the error message) +- A clear explanation of why you think something is wrong +- The specific versions of seaborn and matplotlib that you are working with + +Bug reports are easiest to address if they can be demonstrated using one of the +example datasets from the seaborn docs (i.e. with :func:`load_dataset`). +Otherwise, it is preferable that your example generate synthetic data to +reproduce the problem. If you can only demonstrate the issue with your +actual dataset, you will need to share it, ideally as a csv. + +If you've encountered an error, searching the specific text of the message +before opening a new issue can often help you solve the problem quickly and +avoid making a duplicate report. + +Because matplotlib handles the actual rendering, errors or incorrect outputs +may be due to a problem in matplotlib rather than one in seaborn. It can save time +if you try to reproduce the issue in an example that uses only matplotlib, +so that you can report it in the right place. But it is alright to skip this +step if it's not obvious how to do it. + +General support questions are more at home on `stackoverflow +`_, where there is a +larger audience of people who will see your post and may be able to offer +assistance. Your chance of getting a quick answer will be higher if you include +`runnable code `_, +a precise statement of what you are hoping to achieve, and a clear explanation +of the problems that you have encountered. diff --git a/doc/introduction.rst b/doc/introduction.rst deleted file mode 100644 index 58878f8796..0000000000 --- a/doc/introduction.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. _introduction: - -An introduction to seaborn -========================== - -Seaborn is a library for making attractive and informative statistical graphics in Python. It is built on top of `matplotlib `_ and tightly integrated with the `PyData `_ stack, including support for `numpy `_ and `pandas `_ data structures and statistical routines from `scipy `_ and `statsmodels `_. - -Some of the features that seaborn offers are - -- Several :ref:`built-in themes ` that improve on the default matplotlib aesthetics -- Tools for choosing :ref:`color palettes ` to make beautiful plots that reveal patterns in your data -- Functions for visualizing :ref:`univariate ` and :ref:`bivariate ` distributions or for :ref:`comparing ` them between subsets of data -- Tools that fit and visualize :ref:`linear regression ` models for different kinds of :ref:`independent ` and :ref:`dependent ` variables -- Functions that visualize :ref:`matrices of data ` and use clustering algorithms to :ref:`discover structure ` in those matrices -- A function to plot :ref:`statistical timeseries ` data with flexible estimation and :ref:`representation ` of uncertainty around the estimate -- High-level abstractions for structuring :ref:`grids of plots ` that let you easily build :ref:`complex ` visualizations - -Seaborn aims to make visualization a central part of exploring and understanding data. The plotting functions operate on dataframes and arrays containing a whole dataset and internally perform the necessary aggregation and statistical model-fitting to produce informative plots. Seaborn's goals are similar to those of R's `ggplot `_, but it takes a different approach with an imperative and object-oriented style that tries to make it straightforward to construct sophisticated plots. If matplotlib "tries to make easy things easy and hard things possible", seaborn aims to make a well-defined set of hard things easy too. - -The plotting functions try to do something useful when called with a minimal set of arguments, and they expose a number of customizable options through additional parameters. Some of the functions plot directly into a matplotlib axes object, while others operate on an entire figure and produce plots with several panels. In the latter case, the plot is drawn using a Grid object that links the structure of the figure to the structure of the dataset in an abstract way. - -Because seaborn uses matplotlib, the graphics can be further tweaked using matplotlib tools and rendered with any of the matplotlib backends to generate publication-quality figures. Seaborn can also be used to target web-based graphics through the `mpld3 `_ and `Bokeh `_ libraries. - -For more detailed information and copious examples of the syntax and resulting plots, you can check out the :ref:`example gallery `, :ref:`tutorial ` or :ref:`API reference `. - diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000000..32bb24529f --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/matplotlibrc b/doc/matplotlibrc new file mode 100644 index 0000000000..67a95bbfd0 --- /dev/null +++ b/doc/matplotlibrc @@ -0,0 +1 @@ +savefig.bbox : tight diff --git a/doc/sphinxext/plot_generator.py b/doc/sphinxext/gallery_generator.py similarity index 80% rename from doc/sphinxext/plot_generator.py rename to doc/sphinxext/gallery_generator.py index 46fbdf01d9..fa8e08b014 100644 --- a/doc/sphinxext/plot_generator.py +++ b/doc/sphinxext/gallery_generator.py @@ -4,7 +4,6 @@ Lightly modified from the mpld3 project. """ -from __future__ import division import os import os.path as op import re @@ -12,43 +11,52 @@ import token import tokenize import shutil -import json +import warnings import matplotlib matplotlib.use('Agg') -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt # noqa: E402 -from matplotlib import image + +# Python 3 has no execfile +def execfile(filename, globals=None, locals=None): + with open(filename, "rb") as fp: + exec(compile(fp.read(), filename, 'exec'), globals, locals) RST_TEMPLATE = """ + +.. currentmodule:: seaborn + .. _{sphinx_tag}: {docstring} .. image:: {img_file} -**Python source code:** :download:`[download source: {fname}]<{fname}>` +**seaborn components used:** {components} .. literalinclude:: {fname} :lines: {end_line}- + """ INDEX_TEMPLATE = """ +:html_theme.sidebar_secondary.remove: .. raw:: html