diff --git a/.travis.yml b/.travis.yml
index 2d089026f..1ac6cb6a5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,21 @@
-# Travis Continuos Integration
-# Currently only tests notebook files
-# Based on https://conda.pydata.org/docs/travis.html
-sudo: false # use container based build
+# Travis Continuous Integration
+# Mamba links:
+# https://mamba.readthedocs.io/en/latest/installation.html
+# Sample travis file:
+# https://conda.pydata.org/docs/travis.html
+# Avoiding duplicate branch-pr triggers:
+# https://github.com/travis-ci/travis-ci/issues/1147#issuecomment-160820262
+dist: xenial
+sudo: false  # use container based build
 language: python
-dist: focal
+
 notifications:
   email: false
 
+branches:
+  only:
+    - master
+
 python:
   - "3.7"
 
@@ -18,25 +27,22 @@ before_install:
           echo "Only doc files were updated, not running the CI."
           exit
         fi
-  - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-  - bash miniconda.sh -b -p $HOME/miniconda
-  - export PATH="$HOME/miniconda/bin:$PATH"
-  - hash -r
-  - conda config --set always_yes yes --set changeps1 no --set show_channel_urls true
-  - conda update -q conda
-  - conda info -a
+  - curl micro.mamba.pm/install.sh | bash
+  - export PATH="$HOME/micromamba/bin:$PATH"
+  - micromamba config set always_yes true
+  - micromamba config set changeps1 false
+  - micromamba config set show_channel_urls true
+  - micromamba shell init --shell=bash --prefix=~/micromamba
 
 install:
-  - conda env create --file ci/environment.yml
-  - source activate proplot-dev
-  - conda list
-  - which conda
+  - . ~/.bashrc
   - which python
+  - which micromamba
+  - micromamba env create --file ci/environment.yml
+  - micromamba activate proplot-dev
   - python setup.py sdist bdist_wheel
   - pip install --user ./dist/*.whl
+  - micromamba list
 
 script:
   - ci/run-linter.sh
-  - pushd docs
-  - make html
-  - popd
diff --git a/ci/environment.yml b/ci/environment.yml
index f316c156d..dbfd85855 100644
--- a/ci/environment.yml
+++ b/ci/environment.yml
@@ -7,12 +7,12 @@ channels:
 dependencies:
   - python==3.8
   - numpy==1.19.5
-  - pandas
-  - xarray
   - matplotlib==3.2.2
   - cartopy==0.20.2
-  - ipykernel
-  - pandoc
+  - seaborn
+  - pandas
+  - xarray
+  - pint
   - python-build
   - setuptools
   - setuptools_scm
@@ -26,14 +26,6 @@ dependencies:
     - black
     - doc8
     - pytest
-    - pytest-sugar
     - pyqt5
-    - docutils==0.16
-    - sphinx>=3.0
-    - sphinx-copybutton
-    - sphinx-rtd-light-dark
-    - jinja2==2.11.3
-    - markupsafe==2.0.1
-    - nbsphinx==0.8.1
     - jupytext
-    - git+https://github.com/proplot-dev/sphinx-automodapi@proplot-mods
+    - git+https://github.com/proplot-dev/pytest-mpl@proplot-mods
diff --git a/ci/run-linter.sh b/ci/run-linter.sh
index c59d7cabc..057a0f9bb 100755
--- a/ci/run-linter.sh
+++ b/ci/run-linter.sh
@@ -12,6 +12,9 @@ flake8 proplot docs --exclude .ipynb_checkpoints --max-line-length=88 --ignore=W
 echo '[isort]'
 isort --recursive --check-only --line-width=88 --skip __init__.py --multi-line=3 --force-grid-wrap=0 --trailing-comma proplot
 
+echo '[pytest]'
+pytest proplot
+
 # echo '[black]'
 # black --check -S proplot
 
diff --git a/proplot/tests/test_1dplots.py b/proplot/tests/test_1dplots.py
new file mode 100644
index 000000000..aa7ab327d
--- /dev/null
+++ b/proplot/tests/test_1dplots.py
@@ -0,0 +1,344 @@
+#!/usr/bin/env python3
+"""
+Test 1D plotting overrides.
+"""
+import numpy as np
+import numpy.ma as ma
+import pandas as pd
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_auto_reverse():
+    """
+    Test enabled and disabled auto reverse.
+    """
+    x = np.arange(10)[::-1]
+    y = np.arange(10)
+    z = state.rand(10, 10)
+    fig, axs = pplt.subplots(ncols=2, nrows=3, share=0)
+    # axs[0].format(xreverse=False)  # should fail
+    axs[0].plot(x, y)
+    axs[1].format(xlim=(0, 9))  # manual override
+    axs[1].plot(x, y)
+    axs[2].plotx(x, y)
+    axs[3].format(ylim=(0, 9))  # manual override
+    axs[3].plotx(x, y)
+    axs[4].pcolor(x, y[::-1], z)
+    axs[5].format(xlim=(0, 9), ylim=(0, 9))  # manual override!
+    axs[5].pcolor(x, y[::-1], z)
+    fig.format(suptitle='Auto-reverse test', collabels=['reverse', 'fixed'])
+
+
+@pplt.tests.image_compare
+def test_cmap_cycles():
+    """
+    Test sampling of multiple continuous colormaps.
+    """
+    cycle = pplt.Cycle(
+        'Boreal', 'Grays', 'Fire', 'Glacial', 'yellow',
+        left=[0.4] * 5, right=[0.6] * 5,
+        samples=[3, 4, 5, 2, 1],
+    )
+    fig, ax = pplt.subplots()
+    data = state.rand(10, len(cycle)).cumsum(axis=1)
+    data = pd.DataFrame(data, columns=list('abcdefghijklmno'))
+    ax.plot(data, cycle=cycle, linewidth=2, legend='b')
+
+
+@pplt.tests.image_compare
+def test_column_iteration():
+    """
+    Test scatter column iteration.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    axs[0].plot(state.rand(5, 5), state.rand(5, 5), lw=5)
+    axs[1].scatter(
+        state.rand(5, 5), state.rand(5, 5), state.rand(5, 5), state.rand(5, 5)
+    )
+
+
+@pplt.tests.image_compare
+def test_bar_stack():
+    """
+    Test bar and area stacking.
+    """
+    # TODO: Add test here
+
+
+@pplt.tests.image_compare
+def test_bar_width():
+    """
+    Test relative and absolute widths.
+    """
+    fig, axs = pplt.subplots(ncols=3)
+    x = np.arange(10)
+    y = state.rand(10, 2)
+    for i, ax in enumerate(axs):
+        ax.bar(x * (2 * i + 1), y, width=0.8, absolute_width=i == 1)
+
+
+@pplt.tests.image_compare
+def test_bar_vectors():
+    """
+    Test vector arguments to bar plots.
+    """
+    fig, ax = pplt.subplots(refwidth=3, facecolor='orange0')
+    ax.bar(
+        np.arange(10),
+        np.arange(1, 11),
+        linewidth=3,
+        # facecolor=(np.repeat(0.1, 3) * np.arange(1, 11)[:, None]).tolist(),
+        edgecolor=[f'gray{i}' for i in range(9, -1, -1)],
+        facecolor=[f'gray{i}' for i in range(10)],
+        alpha=np.linspace(0.1, 1, 10),
+        hatch=[None, '//'] * 5,
+    )
+
+
+@pplt.tests.image_compare
+def test_boxplot_colors():
+    """
+    Test box colors and cycle colors.
+    """
+    fig = pplt.figure(share=False)
+    ax = fig.subplot(221)
+    box_data = state.uniform(-3, 3, size=(1000, 5))
+    violin_data = state.normal(0, 1, size=(1000, 5))
+    ax.box(box_data, fillcolor=['red', 'blue', 'green', 'orange', 'yellow'])
+    ax = fig.subplot(222)
+    ax.violin(violin_data, fillcolor=['gray1', 'gray7'], hatches=[None, '//'], means=True, barstds=2)  # noqa: E501
+    ax = fig.subplot(223)
+    ax.boxh(box_data, cycle='pastel2')
+    ax = fig.subplot(224)
+    ax.violinh(violin_data, cycle='pastel1')
+
+
+@pplt.tests.image_compare
+def test_boxplot_vectors():
+    """
+    Test vector property arguments.
+    """
+    coords = (0.5, 1, 2)
+    counts = (10, 20, 100)
+    labels = ['foo', 'bar', 'baz']
+    datas = []
+    for count in counts:
+        data = state.rand(count)
+        datas.append(data)
+    datas = np.array(datas, dtype=object)
+    fig, ax = pplt.subplot(refwidth=3)
+    ax.boxplot(
+        coords,
+        datas,
+        lw=2,
+        notch=False,
+        whis=(10, 90),
+        cycle='538',
+        fillalpha=[0.5, 0.5, 1],
+        hatch=[None, '//', '**'],
+        boxlw=[2, 1, 1],
+    )
+    ax.format(xticklabels=labels)
+
+
+@pplt.tests.image_compare
+def test_histogram_types():
+    """
+    Test the different histogram types using basic keywords.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2, share=False)
+    data = state.normal(size=(100, 5))
+    data += np.arange(5)
+    kws = ({'stack': 0}, {'stack': 1}, {'fill': 0}, {'fill': 1, 'alpha': 0.5})
+    for ax, kw in zip(axs, kws):
+        ax.hist(data, ec='k', **kw)
+
+
+@pplt.tests.image_compare
+def test_invalid_plot():
+    """
+    Test lines with missing or invalid values.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    data = state.normal(size=(100, 5))
+    for j in range(5):
+        data[:, j] = np.sort(data[:, j])
+        data[:19 * (j + 1), j] = np.nan
+        # data[:20, :] = np.nan
+    data_masked = ma.masked_invalid(data)  # should be same result
+    for ax, dat in zip(axs, (data, data_masked)):
+        ax.plot(dat, means=True, shade=True)
+
+
+@pplt.tests.image_compare
+def test_invalid_dist():
+    """
+    Test distributions with missing or invalid data.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2)
+    data = state.normal(size=(100, 5))
+    for i in range(5):  # test uneven numbers of invalid values
+        data[:10 * (i + 1), :] = np.nan
+    data_masked = ma.masked_invalid(data)  # should be same result
+    for ax, dat in zip(axs[:2], (data, data_masked)):
+        ax.violin(dat, means=True)
+    for ax, dat in zip(axs[2:], (data, data_masked)):
+        ax.box(dat, fill=True, means=True)
+
+
+@pplt.tests.image_compare
+def test_pie_charts():
+    """
+    Test basic pie plots. No examples in user guide right now.
+    """
+    pplt.rc.inlinefmt = 'svg'
+    labels = ['foo', 'bar', 'baz', 'biff', 'buzz']
+    array = np.arange(1, 6)
+    data = pd.Series(array, index=labels)
+    fig = pplt.figure()
+    ax = fig.subplot(121)
+    ax.pie(array, edgefix=True, labels=labels, ec='k', cycle='reds')
+    ax = fig.subplot(122)
+    ax.pie(data, ec='k', cycle='blues')
+
+
+@pplt.tests.image_compare
+def test_parametric_labels():
+    """
+    Test passing strings as parametric 'color values'. This is likely
+    a common use case.
+    """
+    pplt.rc.inlinefmt = 'svg'
+    fig, ax = pplt.subplots()
+    ax.parametric(
+        state.rand(5), c=list('abcde'), lw=20, colorbar='b', cmap_kw={'left': 0.2}
+    )
+
+
+@pplt.tests.image_compare
+def test_parametric_colors():
+    """
+    Test color input arguments. Should be able to make monochromatic
+    plots for case where we want `line` without sticky x/y edges.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2)
+    colors = (
+        [(0, 1, 1), (0, 1, 0), (1, 0, 0), (0, 0, 1), (1, 1, 0)],
+        ['b', 'r', 'g', 'm', 'c', 'y'],
+        'black',
+        (0.5, 0.5, 0.5),
+    )
+    for ax, color in zip(axs, colors):
+        ax.parametric(
+            state.rand(5), state.rand(5),
+            linewidth=2, label='label', color=color, colorbar='b', legend='b'
+        )
+
+
+@pplt.tests.image_compare
+def test_scatter_args():
+    """
+    Test diverse scatter keyword parsing and RGB scaling.
+    """
+    x, y = state.randn(50), state.randn(50)
+    data = state.rand(50, 3)
+    fig, axs = pplt.subplots(ncols=4, share=0)
+    ax = axs[0]
+    ax.scatter(x, y, s=80, fc='none', edgecolors='r')
+    ax = axs[1]
+    ax.scatter(data, c=data, cmap='reds')  # column iteration
+    ax = axs[2]
+    with pplt.tests.warns(pplt.internals.ProplotWarning) as record:
+        ax.scatter(data[:, 0], c=data, cmap='reds')  # actual colors
+    assert len(record) == 1
+    ax = axs[3]
+    ax.scatter(data, mean=True, shadestd=1, barstd=0.5)  # distribution
+    ax.format(xlim=(-0.1, 2.1))
+
+
+@pplt.tests.image_compare
+def test_scatter_inbounds():
+    """
+    Test in-bounds scatter plots.
+    """
+    fig, axs = pplt.subplots(ncols=2, share=False)
+    N = 100
+    fig.format(xlim=(0, 20))
+    for i, ax in enumerate(axs):
+        c = ax.scatter(np.arange(N), np.arange(N), c=np.arange(N), inbounds=bool(i))
+        ax.colorbar(c, loc='b')
+
+
+@pplt.tests.image_compare
+def test_scatter_alpha():
+    """
+    Test behavior with multiple alpha values.
+    """
+    fig, ax = pplt.subplots()
+    data = state.rand(10)
+    alpha = np.linspace(0.1, 1, data.size)
+    with pplt.tests.warns(pplt.internals.ProplotWarning) as record:
+        ax.scatter(data, alpha=alpha)
+    assert len(record) == 1
+    with pplt.tests.warns(pplt.internals.ProplotWarning) as record:
+        ax.scatter(data + 1, c=np.arange(data.size), cmap='BuRd', alpha=alpha)
+    assert len(record) == 1
+    ax.scatter(data + 2, color='k', alpha=alpha)
+    ax.scatter(data + 3, color=[f'red{i}' for i in range(data.size)], alpha=alpha)
+
+
+@pplt.tests.image_compare
+def test_scatter_cycle():
+    """
+    Test scatter property cycling.
+    """
+    fig, ax = pplt.subplots()
+    cycle = pplt.Cycle(
+        '538',
+        marker=['X', 'o', 's', 'd'],
+        sizes=[20, 100],
+        edgecolors=['r', 'k']
+    )
+    ax.scatter(
+        state.rand(10, 4),
+        state.rand(10, 4),
+        cycle=cycle,
+        area_size=False,
+    )
+
+
+@pplt.tests.image_compare
+def test_scatter_sizes():
+    """
+    Test marker size scaling.
+    """
+    # Compare setting size to input size
+    size = 20
+    with pplt.rc.context({'lines.markersize': size}):
+        fig = pplt.figure()
+        ax = fig.subplot(121, margin=0.15)
+        for i in range(3):
+            kw = {'absolute_size': i == 2}
+            if i == 1:
+                kw['smin'] = 0
+                kw['smax'] = size ** 2  # should be same as relying on lines.markersize
+            ax.scatter(np.arange(5), [0.25 * (1 + i)] * 5, size ** 2, **kw)
+    # Test various size arguments
+    ax = fig.subplot(122, margin=0.15)
+    data = state.rand(5) * 500
+    ax.scatter(
+        np.arange(5), [0.25] * 5,
+        c='blue7', sizes=[5, 10, 15, 20, 25], area_size=False, absolute_size=True,
+    )
+    ax.scatter(
+        np.arange(5), [0.50] * 5, c='red7', sizes=data, absolute_size=True
+    )
+    ax.scatter(
+        np.arange(5), [0.75] * 5, c='red7', sizes=data, absolute_size=False
+    )
+    for i, d in enumerate(data):
+        ax.text(i, 0.5, format(d, '.0f'), va='center', ha='center')
diff --git a/proplot/tests/test_2dplots.py b/proplot/tests/test_2dplots.py
new file mode 100644
index 000000000..8e8fee55b
--- /dev/null
+++ b/proplot/tests/test_2dplots.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python3
+"""
+Test 2D plotting overrides.
+"""
+import numpy as np
+import pytest
+import xarray as xr
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_colormap_vcenter():
+    """
+    Test colormap vcenter.
+    """
+    fig, axs = pplt.subplots(ncols=3)
+    data = 10 * state.rand(10, 10) - 3
+    axs[0].pcolor(data, vcenter=0)
+    axs[1].pcolor(data, vcenter=1)
+    axs[2].pcolor(data, vcenter=2)
+
+
+@pplt.tests.image_compare
+def test_auto_diverging():
+    """
+    Test that auto diverging works.
+    """
+    # Test with basic data
+    fig = pplt.figure()
+    # fig.format(collabels=('Auto sequential', 'Auto diverging'), suptitle='Default')
+    ax = fig.subplot(121)
+    ax.pcolor(state.rand(10, 10) * 5, colorbar='b')
+    ax = fig.subplot(122)
+    ax.pcolor(state.rand(10, 10) * 5 - 3.5, colorbar='b')
+    fig.format(toplabels=('Sequential', 'Diverging'))
+
+    # Test with explicit vcenter
+    fig, axs = pplt.subplots(ncols=3)
+    data = 5 * state.rand(10, 10)
+    axs[0].pcolor(data, vcenter=0, colorbar='b')  # otherwise should be disabled
+    axs[1].pcolor(data, vcenter=1.5, colorbar='b')
+    axs[2].pcolor(data, vcenter=4, colorbar='b', symmetric=True)
+
+    # Test when cmap input disables auto diverging.
+    fig, axs = pplt.subplots(ncols=2, nrows=2, refwidth=2)
+    cmap = pplt.Colormap(('red7', 'red3', 'red1', 'blue1', 'blue3', 'blue7'), listmode='discrete')  # noqa: E501
+    data1 = 10 * state.rand(10, 10)
+    data2 = data1 - 2
+    for i, cmap in enumerate(('RdBu_r', cmap)):
+        for j, data in enumerate((data1, data2)):
+            cmap = pplt.Colormap(pplt.Colormap(cmap))
+            axs[i, j].pcolormesh(data, cmap=cmap, colorbar='b')
+
+    fig, axs = pplt.subplots(ncols=3)
+    data = state.rand(5, 5) * 10 - 5
+    for i, ax in enumerate(axs[:2]):
+        ax.pcolor(data, sequential=bool(i), colorbar='b')
+    axs[2].pcolor(data, diverging=False, colorbar='b')  # should have same effect
+
+    fig, axs = pplt.subplots(ncols=2)
+    data = state.rand(5, 5) * 10 + 2
+    for ax, norm in zip(axs, (None, 'div')):
+        ax.pcolor(data, norm=norm, colorbar='b')
+
+
+@pplt.tests.image_compare
+def test_colormap_mode():
+    """
+    Test auto extending, auto discrete. Should issue warnings.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2, share=False)
+    axs[0].pcolor(state.rand(5, 5) % 0.3, extend='both', cyclic=True, colorbar='b')
+    axs[1].pcolor(state.rand(5, 5), sequential=True, diverging=True, colorbar='b')
+    axs[2].pcolor(state.rand(5, 5), discrete=False, qualitative=True, colorbar='b')
+    pplt.rc['cmap.discrete'] = False  # should be ignored below
+    axs[3].contourf(state.rand(5, 5), colorbar='b')
+
+
+@pplt.tests.image_compare
+def test_contour_labels():
+    """
+    Test contour labels. We use a separate `contour` object when adding labels to
+    filled contours or else weird stuff happens (see below). We could just modify
+    filled contour edges when not adding labels but that would be inconsistent with
+    behavior when labels are active.
+    """
+    data = state.rand(5, 5) * 10 - 5
+    fig, axs = pplt.subplots(ncols=2)
+    ax = axs[0]
+    ax.contourf(
+        data, edgecolor='k', linewidth=1.5,
+        labels=True, labels_kw={'color': 'k', 'size': 'large'}
+    )
+    ax = axs[1]
+    m = ax.contourf(data)
+    ax.clabel(m, colors='black', fontsize='large')  # looks fine without this
+    for o in m.collections:
+        o.set_linewidth(1.5)
+        o.set_edgecolor('k')
+
+
+@pplt.tests.image_compare
+def test_contour_negative():
+    """
+    Ensure `cmap.monochrome` properly assigned.
+    """
+    fig = pplt.figure(share=False)
+    ax = fig.subplot(131)
+    data = state.rand(10, 10) * 10 - 5
+    ax.contour(data, color='k')
+    ax = fig.subplot(132)
+    ax.tricontour(*(state.rand(3, 20) * 10 - 5), color='k')
+    ax = fig.subplot(133)
+    ax.contour(data, cmap=['black'])  # fails but that's ok
+
+
+@pplt.tests.image_compare
+def test_contour_single():
+    """
+    Test whether single contour works.
+    """
+    da = xr.DataArray(
+        np.array(
+            [
+                [0, 1, 2],
+                [3, 4, 5],
+                [6, 7, 8],
+                [9, 10, 11]
+            ]
+        ),
+        dims=['y', 'x']
+    )
+    fig, ax = pplt.subplots()
+    ax.contour(da, levels=[5.0], color='r')
+
+
+@pplt.tests.image_compare
+def test_edge_fix():
+    """
+    Test edge fix applied to 1D plotting utilities.
+    """
+    # Test basic application
+    # TODO: This should make no difference for PNG plots?
+    pplt.rc.edgefix = 1
+    fig, axs = pplt.subplots(ncols=2, share=False)
+    axs.format(grid=False)
+    axs[0].bar(state.rand(10,) * 10 - 5, width=1, negpos=True)
+    axs[1].area(state.rand(5, 3), stack=True)
+
+    # Test whether ignored for transparent colorbars
+    data = state.rand(10, 10)
+    cmap = 'magma'
+    fig, axs = pplt.subplots(nrows=3, ncols=2, refwidth=2.5, share=False)
+    for i, iaxs in enumerate((axs[:2], axs[2:4])):
+        if i == 0:
+            cmap = pplt.Colormap('magma', alpha=0.5)
+            alpha = None
+            iaxs.format(title='Colormap alpha')
+        else:
+            cmap = 'magma'
+            alpha = 0.5
+            iaxs.format(title='Single alpha')
+        iaxs[0].contourf(data, cmap=cmap, colorbar='b', alpha=alpha)
+        iaxs[1].pcolormesh(data, cmap=cmap, colorbar='b', alpha=alpha)
+    axs[4].bar(data[:3, :3], alpha=0.5)
+    axs[5].area(data[:3, :3], alpha=0.5, stack=True)
+
+
+@pplt.tests.image_compare
+def test_flow_functions():
+    """
+    These are seldom used and missing from documentation. Be careful
+    not to break anything basic.
+    """
+    fig, ax = pplt.subplots()
+    for _ in range(2):
+        ax.streamplot(state.rand(10, 10), 5 * state.rand(10, 10), label='label')
+
+    fig, axs = pplt.subplots(ncols=2)
+    ax = axs[0]
+    ax.quiver(
+        state.rand(10, 10), 5 * state.rand(10, 10), c=state.rand(10, 10),
+        label='label'
+    )
+    ax = axs[1]
+    ax.quiver(state.rand(10), state.rand(10), label='single')
+
+
+@pplt.tests.image_compare
+def test_gray_adjustment():
+    """
+    Test gray adjustments when creating segmented colormaps.
+    """
+    fig, ax = pplt.subplots()
+    data = state.rand(5, 5) * 10 - 5
+    cmap = pplt.Colormap(['blue', 'grey3', 'red'])
+    ax.pcolor(data, cmap=cmap, colorbar='b')
+
+
+@pplt.tests.image_compare
+def test_ignore_message():
+    """
+    Test various ignored argument warnings.
+    """
+    warning = pplt.internals.ProplotWarning
+    fig, axs = pplt.subplots(ncols=2, nrows=2)
+    with pytest.warns(warning):
+        axs[0].contour(
+            state.rand(5, 5) * 10, levels=pplt.arange(10), symmetric=True
+        )
+    with pytest.warns(warning):
+        axs[1].contourf(
+            state.rand(10, 10), levels=np.linspace(0, 1, 10),
+            locator=5, locator_kw={}
+        )
+    with pytest.warns(warning):
+        axs[2].contourf(
+            state.rand(10, 10),
+            levels=pplt.arange(0, 1, 0.2),
+            vmin=0,
+            vmax=2,
+            locator=3,
+            colorbar='b'
+        )
+    with pytest.warns(warning):
+        axs[3].hexbin(
+            state.rand(1000),
+            state.rand(1000),
+            levels=pplt.arange(0, 20),
+            gridsize=10,
+            locator=2,
+            colorbar='b',
+            cmap='blues',
+        )
+
+
+@pplt.tests.image_compare
+def test_levels_with_vmin_vmax():
+    """
+    Make sure `vmin` and `vmax` go into level generation algorithm.
+    """
+    # Sample data
+    state = np.random.RandomState(51423)
+    x = y = np.array([-10, -5, 0, 5, 10])
+    data = state.rand(y.size, x.size)
+
+    # Figure
+    fig = pplt.figure(refwidth=2.3, share=False)
+    axs = fig.subplots()
+    m = axs.pcolormesh(x, y, data, vmax=1.35123)
+    axs.colorbar([m], loc='r')
+
+
+@pplt.tests.image_compare
+def test_level_restriction():
+    """
+    Test `negative`, `positive`, and `symmetric` with and without discrete.
+    """
+    fig, axs = pplt.subplots(ncols=3, nrows=2)
+    data = 20 * state.rand(10, 10) - 5
+    keys = ('negative', 'positive', 'symmetric')
+    for i, grp in enumerate((axs[:3], axs[3:])):
+        for j, ax in enumerate(grp):
+            kw = {keys[j]: True, 'discrete': bool(1 - i)}
+            ax.pcolor(data, **kw, colorbar='b')
+
+
+@pplt.tests.image_compare
+def test_qualitative_colormaps():
+    """
+    Test both `colors` and `cmap` input and ensure extend setting is used for
+    extreme only if unset.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    data = state.rand(5, 5)
+    colors = pplt.get_colors('set3')
+    for ax, extend in zip(axs, ('both', 'neither')):
+        ax.pcolor(
+            data, extend=extend,
+            colors=colors, colorbar='b'
+        )
+
+    fig, axs = pplt.subplots(ncols=2)
+    data = state.rand(5, 5)
+    cmap = pplt.Colormap('set3')
+    cmap.set_under('black')  # does not overwrite
+    for ax, extend in zip(axs, ('both', 'neither')):
+        ax.pcolor(
+            data, extend=extend, cmap=cmap, colorbar='b'
+        )
+
+
+@pplt.tests.image_compare
+def test_segmented_norm():
+    """
+    Test segmented norm with non-discrete levels.
+    """
+    fig, ax = pplt.subplots()
+    ax.pcolor(
+        state.rand(5, 5) * 10,
+        discrete=False,
+        norm='segmented',
+        norm_kw={'levels': [0, 2, 10]},
+        colorbar='b'
+    )
+
+
+@pplt.tests.image_compare
+def test_triangular_functions():
+    """
+    Test triangular functions. Here there is no remotely sensible way to infer
+    coordinates so we skip standardize function.
+    """
+    fig, ax = pplt.subplots()
+    N = 30
+    y = state.rand(N) * 20
+    x = state.rand(N) * 50
+    da = xr.DataArray(state.rand(N), dims=('x',), coords={'x': x, 'y': ('x', y)})
+    ax.tricontour(da.x, da.y, da, labels=True)
diff --git a/proplot/tests/test_axes.py b/proplot/tests/test_axes.py
new file mode 100644
index 000000000..6252d9c1c
--- /dev/null
+++ b/proplot/tests/test_axes.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""
+Test twin, inset, and panel axes.
+"""
+import numpy as np
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_inset_colors():
+    """
+    Test color application for zoom boxes.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(xlim=(0, 100), ylim=(0, 100))
+    ix = ax.inset_axes(
+        (0.5, 0.5, 0.3, 0.3), zoom=True, zoom_kw={'color': 'r', 'fc': 'r', 'ec': 'b'}
+    )  # zoom_kw={'alpha': 1})
+    # ix = ax.inset_axes((40, 40, 20, 20), zoom=True, transform='data')
+    ix.format(xlim=(10, 20), ylim=(10, 20), grid=False)
+
+    fig, ax = pplt.subplots()
+    ax.format(xlim=(0, 100), ylim=(0, 100))
+    ix = ax.inset_axes(
+        (0.3, 0.5, 0.5, 0.3), zoom=True,
+        zoom_kw={'lw': 3, 'ec': 'red9', 'a': 1, 'c': pplt.set_alpha('red4', 0.5)}
+    )
+    ix.format(xlim=(10, 20), ylim=(10, 20))
+
+
+@pplt.tests.image_compare
+def test_inset_zoom_update():
+    """
+    Test automatic limit adjusment with successive changes. Without the extra
+    lines in `draw()` and `get_tight_bbox()` this fails.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(xlim=(0, 100), ylim=(0, 100))
+    ix = ax.inset_axes((40, 40, 20, 20), zoom=True, transform='data')
+    ix.format(xlim=(10, 20), ylim=(10, 20), grid=False)
+
+    ix.format(xlim=(10, 20), ylim=(10, 30))
+    fig.show()
+
+    ax.format(ylim=(0, 300))
+    fig.show()
+
+
+@pplt.tests.image_compare
+def test_panels_with_sharing():
+    """
+    Previously the below text would hide the second y label.
+    """
+    fig, axs = pplt.subplots(ncols=2, share=False, refwidth=1.5)
+    axs.panel('left')
+    fig.format(ylabel='ylabel', xlabel='xlabel')
+
+
+@pplt.tests.image_compare
+def test_panels_without_sharing():
+    """
+    What should happen if `share=False` but figure-wide sharing enabled?
+    Strange use case but behavior appears "correct."
+    """
+    fig, axs = pplt.subplots(ncols=2, share=True, refwidth=1.5, includepanels=False)
+    axs.panel('left', share=False)
+    fig.format(ylabel='ylabel', xlabel='xlabel')
+
+    fig, axs = pplt.subplots(ncols=2, refwidth=1.5, includepanels=True)
+    for _ in range(3):
+        p = axs[0].panel('l', space=0)
+        p.format(xlabel='label')
+    fig.format(xlabel='xlabel')
+
+
+@pplt.tests.image_compare
+@pplt.tests.image_compare
+def test_panels_suplabels():
+    """
+    Test label sharing for `includepanels=True`.
+    """
+    for b in range(3):
+        for ncols in range(1, 3):
+            fig, axs = pplt.subplots(ncols=ncols, refwidth=1.5, includepanels=(b == 2))
+            if b:
+                for _ in range(3):
+                    axs[0].panel('l')
+            axs.format(xlabel='xlabel\nxlabel\nxlabel', ylabel='ylabel', suptitle='sup')
+
+
+def test_twin_axes():
+    """
+    Adjust twin axis positions. Should allow easily switching the location.
+    """
+    # Test basic twin creation and tick, spine, label location changes
+    fig = pplt.figure()
+    ax = fig.subplot()
+    ax.format(
+        ycolor='blue', ylabel='orig', ylabelcolor='blue9',
+        yspineloc='l', labelweight='bold', xlabel='xlabel',
+        xtickloc='t', xlabelloc='b',
+    )
+    ax.alty(
+        loc='r', color='r', labelcolor='red9', label='other', labelweight='bold'
+    )
+
+    # Simple example but doesn't quite work. Figure out how to specify left vs. right
+    # spines for 'offset' locations... maybe needs another keyword.
+    fig, ax = pplt.subplots()
+    ax.format(ymax=10, ylabel='Reference')
+    ax.alty(color='green', label='Green', max=8)
+    ax.alty(color='red', label='Red', max=15, loc=('axes', -0.2))
+    ax.alty(color='blue', label='Blue', max=5, loc=('axes', 1.2), ticklabeldir='out')
+
+    # A worked example from Riley Brady
+    # Uses auto-adjusting limits
+    fig, ax = pplt.subplots()
+    axs = [ax, ax.twinx(), ax.twinx()]
+    axs[-1].spines['right'].set_position(('axes', 1.2))
+    colors = ('Green', 'Red', 'Blue')
+    for ax, color in zip(axs, colors):
+        data = state.random(1) * state.random(10)
+        ax.plot(data, marker='o', linestyle='none', color=color)
+        ax.format(ylabel='%s Thing' % color, ycolor=color)
+    axs[0].format(xlabel='xlabel')
diff --git a/proplot/tests/test_colorbar.py b/proplot/tests/test_colorbar.py
new file mode 100644
index 000000000..0476d2888
--- /dev/null
+++ b/proplot/tests/test_colorbar.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""
+Test colorbars.
+"""
+import numpy as np
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_outer_align():
+    """
+    Test various align options.
+    """
+    fig, ax = pplt.subplots()
+    ax.plot(np.empty((0, 4)), labels=list('abcd'))
+    ax.legend(loc='bottom', align='right', ncol=2)
+    ax.legend(loc='left', align='bottom', ncol=1)
+    ax.colorbar('magma', loc='r', align='top', shrink=0.5, label='label', extend='both')
+    ax.colorbar(
+        'magma', loc='top', ticklen=0, tickloc='bottom', align='left',
+        shrink=0.5, label='Title', extend='both', labelloc='top', labelweight='bold'
+    )
+    # ax.colorbar(
+    #     'magma', loc='top', align='left', shrink=0.5, label='label', extend='both'
+    # )
+    ax.colorbar('magma', loc='right', extend='both', label='test extensions')
+    fig.suptitle('Align demo')
+
+
+@pplt.tests.image_compare
+def test_colorbar_ticks():
+    """
+    Test ticks modification.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    ax = axs[0]
+    ax.colorbar(
+        'magma', loc='bottom', ticklen=10, linewidth=3, tickminor=True
+    )
+    ax = axs[1]
+    ax.colorbar(
+        'magma', loc='bottom', ticklen=10, linewidth=3, tickwidth=1.5, tickminor=True
+    )
+
+
+@pplt.tests.image_compare
+def test_discrete_ticks():
+    """
+    Test `DiscreteLocator`.
+    """
+    levels = pplt.arange(0, 2, 0.1)
+    data = state.rand(5, 5) * 2
+    for j, width in enumerate((0.8, 1.2, 2)):
+        fig, axs = pplt.subplots(share=False, ncols=2, nrows=2, refwidth=width)
+        for i, ax in enumerate(axs):
+            cmd = ax.contourf if i // 2 == 0 else ax.pcolormesh
+            m = cmd(data, levels=levels, extend='both')
+            ax.colorbar(m, loc='t' if i // 2 == 0 else 'b')
+            ax.colorbar(m, loc='l' if i % 2 == 0 else 'r')
+        fig.save(f'~/Downloads/tmp{j}.png', dpi=300)
+
+
+@pplt.tests.image_compare
+def test_discrete_vs_fixed():
+    """
+    Test `DiscreteLocator` for numeric on-the-fly
+    mappable ticks and `FixedLocator` otherwise.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=3, refwidth=1.3, share=False)
+    axs[0].plot(state.rand(10, 5), labels=list('xyzpq'), colorbar='b')  # fixed
+    axs[1].plot(state.rand(10, 5), labels=np.arange(5), colorbar='b')  # discrete
+    axs[2].contourf(
+        state.rand(10, 10),
+        colorbar='b', colorbar_kw={'ticklabels': list('xyzpq')}  # fixed
+    )
+    axs[3].contourf(state.rand(10, 10), colorbar='b')  # discrete
+    axs[4].pcolormesh(
+        state.rand(10, 10) * 20, colorbar='b', levels=[0, 2, 4, 6, 8, 10, 15, 20]
+    )  # fixed
+    axs[5].pcolormesh(
+        state.rand(10, 10) * 20, colorbar='b', levels=pplt.arange(0, 20, 2)
+    )  # discrete
+
+
+@pplt.tests.image_compare
+def test_uneven_levels():
+    """
+    Test even and uneven levels with discrete cmap. Ensure minor ticks are disabled.
+    """
+    N = 20
+    state = np.random.RandomState(51423)
+    data = np.cumsum(state.rand(N, N), axis=1) * 12
+    colors = [
+        'white',
+        'indigo1', 'indigo3', 'indigo5', 'indigo7', 'indigo9',
+        'yellow1', 'yellow3', 'yellow5', 'yellow7', 'yellow9',
+        'violet1', 'violet3'
+    ]
+    levels_even = pplt.arange(1, 12, 1)
+    levels_uneven = [1., 1.25, 1.5, 2., 2.5, 3., 3.75, 4.5, 6., 7.5, 9., 12.]
+    fig, axs = pplt.subplots(ncols=2, refwidth=3.)
+    axs[0].pcolor(
+        data, levels=levels_uneven, colors=colors, colorbar='r', extend='both'
+    )
+    axs[1].pcolor(
+        data, levels=levels_even, colors=colors, colorbar='r', extend='both'
+    )
+
+
+@pplt.tests.image_compare
+def test_on_the_fly_mappable():
+    """
+    Test on-the-fly mappable generation.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=3, space=3)
+    axs.format(aspect=0.5)
+    axs[0].colorbar('magma', vmin=None, vmax=100, values=[0, 1, 2, 3, 4], loc='bottom')
+    axs[1].colorbar('magma', vmin=None, vmax=100, loc='bottom')
+    axs[2].colorbar('colorblind', vmin=None, vmax=None, values=[0, 1, 2], loc='bottom')
+    axs[3].colorbar('colorblind', vmin=None, vmax=None, loc='bottom')
+    axs[4].colorbar(['r', 'b', 'g', 'k', 'w'], values=[0, 1, 2], loc='b')
+    axs[5].colorbar(['r', 'b', 'g', 'k', 'w'], loc='bottom')
+
+    # Passing labels to plot function.
+    fig, ax = pplt.subplots()
+    ax.scatter(state.rand(10, 4), labels=['foo', 'bar', 'baz', 'xyz'], colorbar='b')
+
+    # Passing string value lists. This helps complete the analogy with legend 'labels'.
+    fig, ax = pplt.subplots()
+    hs = ax.line(state.rand(20, 5))
+    ax.colorbar(hs, loc='b', values=['abc', 'def', 'ghi', 'pqr', 'xyz'])
+
+
+@pplt.tests.image_compare
+def test_inset_colorbars():
+    """
+    Test basic functionality.
+    """
+    # Simple example
+    fig, ax = pplt.subplots()
+    ax.colorbar('magma', loc='ul')
+
+    # Colorbars from lines
+    fig = pplt.figure(share=False, refwidth=2)
+    ax = fig.subplot(121)
+    state = np.random.RandomState(51423)
+    data = 1 + (state.rand(12, 10) - 0.45).cumsum(axis=0)
+    cycle = pplt.Cycle('algae')
+    hs = ax.line(
+        data, lw=4, cycle=cycle, colorbar='lr',
+        colorbar_kw={'length': '8em', 'label': 'line colorbar'}
+    )
+    ax.colorbar(
+        hs, loc='t', values=np.arange(0, 10),
+        label='line colorbar', ticks=2
+    )
+
+    # Colorbars from a mappable
+    ax = fig.subplot(122)
+    m = ax.contourf(
+        data.T, extend='both', cmap='algae',
+        levels=pplt.arange(0, 3, 0.5)
+    )
+    fig.colorbar(
+        m, loc='r', length=1,  # length is relative
+        label='interior ticks', tickloc='left'
+    )
+    ax.colorbar(
+        m, loc='ul', length=6,  # length is em widths
+        label='inset colorbar', tickminor=True, alpha=0.5,
+    )
+    fig.format(
+        suptitle='Colorbar formatting demo',
+        xlabel='xlabel', ylabel='ylabel', titleabove=False
+    )
+
+
+@pplt.tests.image_compare
+def test_segmented_norm_center():
+    """
+    Test various align options.
+    """
+    fig, ax = pplt.subplots()
+    cmap = pplt.Colormap('NegPos', cut=0.1)
+    data = state.rand(10, 10) * 10 - 2
+    levels = [-4, -3, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64, 128]
+    norm = pplt.SegmentedNorm(levels, vcenter=0, fair=1)
+    ax.pcolormesh(data, levels=levels, norm=norm, cmap=cmap, colorbar='b')
+
+
+@pplt.tests.image_compare
+def test_segmented_norm_ticks():
+    """
+    Ensure segmented norm ticks show up in center when `values` are passed.
+    """
+    fig, ax = pplt.subplots()
+    data = state.rand(10, 10) * 10
+    values = (1, 5, 5.5, 6, 10)
+    ax.contourf(
+        data,
+        values=values,
+        colorbar='ll',
+        colorbar_kw={'tickminor': True, 'minorlocator': np.arange(-20, 20, 0.5)},
+    )
+
+
+@pplt.tests.image_compare
+def test_reversed_levels():
+    """
+    Test negative levels with a discrete norm and segmented norm.
+    """
+    # %%
+    fig, axs = pplt.subplots(ncols=4, nrows=2, refwidth=1.8)
+    data = state.rand(20, 20).cumsum(axis=0)
+    i = 0
+    for stride in (1, -1):
+        for key in ('levels', 'values'):
+            for levels in (
+                np.arange(0, 15, 1),  # with Normalizer
+                [0, 1, 2, 5, 10, 15],  # with LinearSegmentedNorm
+            ):
+                ax = axs[i]
+                kw = {key: levels[::stride]}
+                ax.pcolormesh(data, colorbar='b', **kw)
+                i += 1
diff --git a/proplot/tests/test_docs.py b/proplot/tests/test_docs.py
new file mode 100644
index 000000000..cd92b7fec
--- /dev/null
+++ b/proplot/tests/test_docs.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Automatically build pytests from jupytext py:percent documentation files.
+"""
+import glob
+import os
+
+import jupytext
+import pytest
+
+from proplot.config import rc
+from proplot.tests import SAVEFIG_KWARGS, TOLERANCE, VERSION_STRING
+
+
+def _init_tests():
+    """
+    Construct tests from the jupytext docs examples.
+    """
+    # WARNING: This will only work if all jupytext examples consist of single code
+    # cells or adjacent code cells without intervening markdown or ReST cells.
+    # Alternative would be to define the entire file as a single test but that
+    # would make testing and debugging more difficult.
+    base = os.path.dirname(__file__)
+    paths = glob.glob(f'{base}/../../docs/*.py')
+    for path in sorted(paths):
+        if os.path.basename(path) == 'conf.py':
+            continue
+        baseline, _ = os.path.splitext(os.path.basename(path))
+        result = jupytext.read(path, fmt='py:percent')
+        cells = result['cells']
+        source = ''
+        num = 1
+        for i, cell in enumerate(cells):
+            type_ = cell['cell_type']
+            if type_ == 'code':
+                source += '\n' + cell['source']
+            if not source:
+                continue
+            if i == len(cells) - 1 or type_ != 'code':  # end of example definition
+                _make_cell_test(num, baseline, source)
+                num += 1
+        print(f'\nMade {num} tests from file: {path}', end='')
+
+
+def _make_cell_test(num, baseline, cell_source):
+    """
+    Add a test using the jupytext docs cell.
+    """
+    # WARNING: Ugly kludge to replace e.g. backend='basemap' with backend='cartopy'
+    # for matplotlib versions incompatible with basemap. Generally these examples
+    # test basemap and cartopy side-by-side so this will effectively duplicate the
+    # cartopy tests but keep us from having to dump them.
+    if baseline == 'test_projections' and VERSION_STRING == 'mpl32':
+        cell_source = cell_source.replace("'basemap'", "'cartopy'")
+        if 'pplt.Proj' in cell_source or 'Table' in cell_source:
+            return  # examples that cannot be naively converted
+
+    def run_test():
+        rc.reset()
+        exec(cell_source)
+
+    name = f'{baseline}_cell_{num:02d}'
+    decorator = pytest.mark.mpl_image_compare(
+        run_test,
+        filename=f'{name}_{VERSION_STRING}.png',
+        baseline_dir='images_docs',
+        savefig_kwargs=SAVEFIG_KWARGS,
+        tolerance=TOLERANCE,
+        style={},  # no mpl style
+    )
+    test = decorator(run_test)
+    name = f'test_{name}'
+    test.__name__ = test.__qualname__ = name
+    globals()[name] = test  # for pytest detection
+
+
+# Initialize functions
+_init_tests()
diff --git a/proplot/tests/test_format.py b/proplot/tests/test_format.py
new file mode 100644
index 000000000..4d274e7df
--- /dev/null
+++ b/proplot/tests/test_format.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Test format and rc behavior.
+"""
+import locale
+
+import numpy as np
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+def test_colormap_assign():
+    """
+    Test below line is possible and naming schemes.
+    """
+    pplt.rc['image.cmap'] = pplt.Colormap('phase', shift=180, left=0.2)
+    assert pplt.rc['cmap'] == pplt.rc['cmap.sequential'] == '_Phase_copy_s'
+    pplt.rc['image.cmap'] = pplt.Colormap('magma', reverse=True, right=0.8)
+    assert pplt.rc['image.cmap'] == pplt.rc['cmap.sequential'] == '_magma_copy_r'
+
+
+def test_ignored_keywords():
+    """
+    Test ignored keywords and functions.
+    """
+    with pplt.tests.warns(pplt.internals.ProplotWarning) as record:
+        fig, ax = pplt.subplots(
+            gridspec_kw={'left': 3},
+            subplot_kw={'proj': 'cart'},
+            subplotpars={'left': 0.2},
+        )
+    assert len(record) == 3
+    with pplt.tests.warns(pplt.internals.ProplotWarning) as record:
+        fig.subplots_adjust(left=0.2)
+    assert len(record) == 1
+
+
+@pplt.tests.image_compare
+def test_init_format():
+    """
+    Test application of format args on initialization.
+    """
+    fig, axs = pplt.subplots(
+        ncols=2, xlim=(0, 10), xlabel='xlabel',
+        abc=True, title='Subplot title',
+        collabels=['Column 1', 'Column 2'], suptitle='Figure title',
+    )
+    axs[0].format(hatch='xxx', hatchcolor='k', facecolor='blue3')
+
+
+@pplt.tests.image_compare
+def test_patch_format():
+    """
+    Test application of patch args on initialization.
+    """
+    fig = pplt.figure(suptitle='Super title')
+    fig.subplot(
+        121, proj='cyl', labels=True, land=True, latlines=20,
+        abcloc='l', abc='[A]'
+    )
+    fig.subplot(
+        122, facecolor='gray1', color='red', titleloc='l', title='Hello',
+        abcloc='l', abc='[A]', xticks=0.1, yformatter='scalar'
+    )
+
+
+@pplt.tests.image_compare
+def test_multi_formatting():
+    """
+    Support formatting in multiple projections.
+    """
+    fig, axs = pplt.subplots(ncols=2, proj=('cart', 'cyl'))
+    axs[0].pcolormesh(state.rand(5, 5))
+    fig.format(
+        land=1, labels=1, lonlim=(0, 90), latlim=(0, 90), xlim=(0, 10), ylim=(0, 10),
+    )
+    axs[:1].format(
+        land=1, labels=1, lonlim=(0, 90), latlim=(0, 90), xlim=(0, 10), ylim=(0, 10),
+    )
+
+
+@pplt.tests.image_compare
+def test_inner_title_zorder():
+    """
+    Test prominence of contour labels and whatnot.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(
+        title='TITLE', titleloc='upper center', titleweight='bold', titlesize='xx-large'
+    )
+    ax.format(xlim=(0, 1), ylim=(0, 1))
+    ax.text(
+        0.5, 0.95, 'text', ha='center', va='top', color='red',
+        weight='bold', size='xx-large'
+    )
+    x = [[0.4, 0.6]] * 2
+    y = z = [[0.9, 0.9], [1.0, 1.0]]
+    ax.contour(
+        x, y, z, color='k', labels=True,
+        levels=None, labels_kw={'color': 'blue', 'weight': 'bold', 'size': 'xx-large'}
+    )
+
+
+@pplt.tests.image_compare
+def test_font_adjustments():
+    """
+    Test font name application. Somewhat hard to do.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    axs.format(
+        abc='A.',
+        fontsize=15,
+        fontname='Fira Math',
+        xlabel='xlabel',
+        ylabel='ylabel',
+        title='Title',
+        figtitle='Figure title',
+        collabels=['Column 1', 'Column 2'],
+    )
+
+
+@pplt.tests.image_compare
+def test_axes_colors():
+    """
+    Test behavior of passing color to format.
+    """
+    fig, axs = pplt.subplots(
+        ncols=3, nrows=2, share=False,
+        proj=('cyl', 'cart', 'polar', 'cyl', 'cart', 'polar'), wratios=(2, 2, 1)
+    )
+    axs[:, 0].format(labels=True)
+    axs[:3].format(edgecolor='red', gridlabelsize='med-large', gridlabelweight='bold')
+    axs[:3].format(color='red')  # without this just colors the edge
+    axs[1].format(xticklabelcolor='gray')
+    # axs[2].format(ticklabelcolor='red')
+    axs[1].format(tickcolor='blue')
+    axs[3:].format(color='red')  # ensure propagates
+    # axs[-1].format(gridlabelcolor='green')  # should work
+
+
+@pplt.tests.image_compare
+def test_locale_formatting():
+    """
+    Ensure locale formatting works. Also zerotrim should account
+    for non-period decimal separators.
+    """
+    locale.setlocale(locale.LC_ALL, 'de_DE')
+    pplt.rc['formatter.use_locale'] = False
+    pplt.rc['formatter.zerotrim'] = True
+    with pplt.rc.context({'formatter.use_locale': True}):
+        fig, ax = pplt.subplots()
+        ticks = pplt.arange(-1, 1, 0.1)
+        ax.format(ylim=(min(ticks), max(ticks)), yticks=ticks)
+
+
+@pplt.tests.image_compare
+def test_bounds_ticks():
+    """
+    Test spine bounds and location. Previously applied `fixticks`
+    automatically but no longer the case.
+    """
+    fig, ax = pplt.subplots()
+    # ax.format(xlim=(-10, 10))
+    ax.format(xloc='top')
+    ax.format(xlim=(-10, 15), xbounds=(0, 10))
+
+
+@pplt.tests.image_compare
+def test_cutoff_ticks():
+    """
+    Test spine cutoff ticks.
+    """
+    fig, ax = pplt.subplots()
+    # ax.format(xlim=(-10, 10))
+    ax.format(xlim=(-10, 10), xscale=('cutoff', 0, 2), xloc='top', fixticks=True)
+    ax.axvspan(0, 100, facecolor='k', alpha=0.1)
+
+
+@pplt.tests.image_compare
+def test_spine_side():
+    """
+    Test automatic spine selection when passing `xspineloc` or `yspineloc`.
+    """
+    fig, ax = pplt.subplots()
+    ax.plot(pplt.arange(-5, 5), (10 * state.rand(11, 5) - 5).cumsum(axis=0))
+    ax.format(xloc='bottom', yloc='zero')
+    ax.alty(loc='right')
+
+
+@pplt.tests.image_compare
+def test_spine_offset():
+    """
+    Test offset axes.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(xloc='none')  # test none instead of neither
+    ax.alty(loc=('axes', -0.2), color='red')
+    # ax.alty(loc=('axes', 1.2), color='blue')
+    ax.alty(loc=('axes', -0.4), color='blue')
+    ax.alty(loc=('axes', 1.1), color='green')
+
+
+@pplt.tests.image_compare
+def test_tick_direction():
+    """
+    Test tick direction arguments.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    axs[0].format(tickdir='in')
+    axs[1].format(xtickdirection='inout', ytickdir='out')  # rc setting should be used?
+
+
+@pplt.tests.image_compare
+def test_tick_length():
+    """
+    Test tick length args. Ensure ratios can be applied successively.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(yticklen=100)
+    ax.format(xticklen=50, yticklenratio=0.1)
+
+
+@pplt.tests.image_compare
+def test_tick_width():
+    """
+    Test tick width args. Ensure ratios can be applied successively, setting
+    width to `zero` adjusts length for label padding, and ticks can appear
+    without spines if requested.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2, share=False)
+    ax = axs[0]
+    ax.format(linewidth=2, ticklen=20, xtickwidthratio=1)
+    ax.format(ytickwidthratio=0.3)
+    ax = axs[1]
+    ax.format(axeslinewidth=0, ticklen=20, tickwidth=2)  # should permit ticks
+    ax = axs[2]
+    ax.format(tickwidth=0, ticklen=50)  # should set length to zero
+    ax = axs[3]
+    ax.format(linewidth=0, ticklen=20, tickwidth='5em')  # should override linewidth
+
+
+@pplt.tests.image_compare
+def test_tick_labels():
+    """
+    Test default and overwriting properties of auto tick labels.
+    """
+    import pandas as pd
+    data = state.rand(5, 3)
+    data = pd.DataFrame(data, index=['foo', 'bar', 'baz', 'bat', 'bot'])
+    fig, axs = pplt.subplots(abc='A.', abcloc='ul', ncols=2, refwidth=3, span=False)
+    for i, ax in enumerate(axs):
+        data.index.name = 'label'
+        if i == 1:
+            ax.format(xformatter='null')  # overrides result
+        ax.bar(data, autoformat=True)
+        if i == 0:
+            data.index = ['abc', 'def', 'ghi', 'jkl', 'mno']
+            data.index.name = 'foobar'  # label should be updated
+        ax.bar(-data, autoformat=True)
+
+
+@pplt.tests.image_compare
+def test_label_settings():
+    """
+    Test label colors and ensure color change does not erase labels.
+    """
+    fig, ax = pplt.subplots()
+    ax.format(xlabel='xlabel', ylabel='ylabel')
+    ax.format(labelcolor='red')
diff --git a/proplot/tests/test_integration.py b/proplot/tests/test_integration.py
new file mode 100644
index 000000000..add57a8e4
--- /dev/null
+++ b/proplot/tests/test_integration.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Test xarray, pandas, pint, seaborn integration.
+"""
+import numpy as np
+import pandas as pd
+import pint
+import seaborn as sns
+import xarray as xr
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_pint_quantities():
+    """
+    Ensure auto-formatting and column iteration both work.
+    """
+    pplt.rc.unitformat = '~H'
+    ureg = pint.UnitRegistry()
+    fig, ax = pplt.subplots()
+    ax.plot(
+        np.arange(10),
+        state.rand(10) * ureg.km,
+        'C0',
+        np.arange(10),
+        state.rand(10) * ureg.m * 1e2,
+        'C1'
+    )
+
+
+@pplt.tests.image_compare
+def test_data_keyword():
+    """
+    Make sure `data` keywords work properly.
+    """
+    N = 10
+    M = 20
+    ds = xr.Dataset(
+        {'z': (('x', 'y'), state.rand(N, M))},
+        coords={
+            'x': ('x', np.arange(N) * 10, {'long_name': 'longitude'}),
+            'y': ('y', np.arange(M) * 5, {'long_name': 'latitude'})
+        }
+    )
+    fig, ax = pplt.subplots()
+    # ax.pcolor('z', data=ds, order='F')
+    ax.pcolor(z='z', data=ds, transpose=True)
+    ax.format(xformatter='deglat', yformatter='deglon')
+
+
+@pplt.tests.image_compare
+def test_keep_guide_labels():
+    """
+    Preserve metadata when passing mappables and handles to colorbar and
+    legend subsequently.
+    """
+    fig, ax = pplt.subplots()
+    df = pd.DataFrame(state.rand(5, 5))
+    df.name = 'variable'
+    m = ax.pcolor(df)
+    ax.colorbar(m)
+
+    fig, ax = pplt.subplots()
+    for k in ('foo', 'bar', 'baz'):
+        s = pd.Series(state.rand(5), index=list('abcde'), name=k)
+        ax.plot(
+            s, legend='ul',
+            legend_kw={
+                'lw': 5,
+                'ew': 2,
+                'ec': 'r',
+                'fc': 'w',
+                'handle_kw': {'marker': 'd'}
+            }
+        )
+
+
+@pplt.tests.image_compare
+def test_seaborn_swarmplot():
+    """
+    Test seaborn swarm plots.
+    """
+    tips = sns.load_dataset('tips')
+    fig = pplt.figure(refwidth=3)
+    ax = fig.subplot()
+    sns.swarmplot(ax=ax, x='day', y='total_bill', data=tips, palette='cubehelix')
+    # fig, ax = pplt.subplots()
+    # sns.swarmplot(y=state.normal(size=100), ax=ax)
+    return fig
+
+
+@pplt.tests.image_compare
+def test_seaborn_hist():
+    """
+    Test seaborn histograms.
+    """
+    fig, axs = pplt.subplots(ncols=2, nrows=2)
+    sns.histplot(state.normal(size=100), ax=axs[0])
+    sns.kdeplot(state.rand(100), state.rand(100), ax=axs[1])
+    penguins = sns.load_dataset('penguins')
+    sns.histplot(
+        data=penguins, x='flipper_length_mm', hue='species', multiple='stack', ax=axs[2]
+    )
+    sns.kdeplot(
+        data=penguins, x='flipper_length_mm', hue='species', multiple='stack', ax=axs[3]
+    )
+
+
+@pplt.tests.image_compare
+def test_seaborn_relational():
+    """
+    Test scatter plots. Disabling seaborn detection creates mismatch between marker
+    sizes and legend.
+    """
+    fig = pplt.figure()
+    ax = fig.subplot()
+    sns.set_theme(style='white')
+    # Load the example mpg dataset
+    mpg = sns.load_dataset('mpg')
+    # Plot miles per gallon against horsepower with other semantics
+    sns.scatterplot(
+        x='horsepower', y='mpg', hue='origin', size='weight',
+        sizes=(40, 400), alpha=.5, palette='muted',
+        # legend='bottom',
+        # height=6,
+        data=mpg, ax=ax
+    )
+
+
+@pplt.tests.image_compare
+def test_seaborn_heatmap():
+    """
+    Test seaborn heatmaps. This should work thanks to backwards compatibility support.
+    """
+    fig, ax = pplt.subplots()
+    sns.heatmap(state.normal(size=(50, 50)), ax=ax[0])
diff --git a/proplot/tests/test_journals.py b/proplot/tests/test_journals.py
deleted file mode 100644
index 5fe002027..000000000
--- a/proplot/tests/test_journals.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import pytest
-
-import proplot as pplt
-from proplot.subplots import JOURNAL_SPECS
-
-
-# Loop through all available journals.
-@pytest.mark.parametrize('journal', JOURNAL_SPECS.keys())
-def test_journal_subplots(journal):
-    """Tests that subplots can be generated with journal specifications."""
-    f, axs = pplt.subplots(journal=journal)
diff --git a/proplot/tests/test_legend.py b/proplot/tests/test_legend.py
new file mode 100644
index 000000000..7ff27ec43
--- /dev/null
+++ b/proplot/tests/test_legend.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+Test legends.
+"""
+import numpy as np
+import pandas as pd
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_auto_legend():
+    """
+    Test retrieval of legends from panels, insets, etc.
+    """
+    fig, ax = pplt.subplots()
+    ax.line(state.rand(5, 3), labels=list('abc'))
+    px = ax.panel_axes('right', share=False)
+    px.linex(state.rand(5, 3), labels=list('xyz'))
+    # px.legend(loc='r')
+    ix = ax.inset_axes((-0.2, 0.8, 0.5, 0.5), zoom=False)
+    ix.line(state.rand(5, 2), labels=list('pq'))
+    ax.legend(loc='b', order='F', edgecolor='red9', edgewidth=3)
+
+
+@pplt.tests.image_compare
+def test_singleton_legend():
+    """
+    Test behavior when singleton lists are passed.
+    Ensure this does not trigger centered-row legends.
+    """
+    fig, ax = pplt.subplots()
+    h1 = ax.plot([0, 1, 2], label='a')
+    h2 = ax.plot([0, 1, 1], label='b')
+    ax.legend(loc='best')
+    ax.legend([h1, h2], loc='bottom')
+
+
+@pplt.tests.image_compare
+def test_centered_legends():
+    """
+    Test success of algorithm.
+    """
+    # Basic centered legends
+    fig, axs = pplt.subplots(ncols=2, nrows=2, axwidth=2)
+    hs = axs[0].plot(state.rand(10, 6))
+    locs = ['l', 't', 'r', 'uc', 'ul', 'll']
+    locs = ['l', 't', 'uc', 'll']
+    labels = ['a', 'bb', 'ccc', 'ddddd', 'eeeeeeee', 'fffffffff']
+    for ax, loc in zip(axs, locs):
+        ax.legend(hs, loc=loc, ncol=3, labels=labels, center=True)
+
+    # Pass centered legends with keywords or list-of-list input.
+    fig, ax = pplt.subplots()
+    hs = ax.plot(state.rand(10, 5), labels=list('abcde'))
+    ax.legend(hs, center=True, loc='b')
+    ax.legend(hs + hs[:1], loc='r', ncol=1)
+    ax.legend([hs[:2], hs[2:], hs[0]], loc='t')
+
+
+@pplt.tests.image_compare
+def test_manual_labels():
+    """
+    Test mixed auto and manual labels. Passing labels but no handles does nothing
+    This is breaking change but probably best. We should not be "guessing" the
+    order objects were drawn in then assigning labels to them. Similar to using
+    OO interface and rejecting pyplot "current axes" and "current figure".
+    """
+    fig, ax = pplt.subplots()
+    h1, = ax.plot([0, 1, 2], label='label1')
+    h2, = ax.plot([0, 1, 1], label='label2')
+    for loc in ('best', 'bottom'):
+        ax.legend([h1, h2], loc=loc, labels=[None, 'override'])
+    fig, ax = pplt.subplots()
+    ax.plot([0, 1, 2])
+    ax.plot([0, 1, 1])
+    for loc in ('best', 'bottom'):
+        # ax.legend(loc=loc, labels=['a', 'b'])
+        ax.legend(['a', 'b'], loc=loc)  # same as above
+
+
+@pplt.tests.image_compare
+def test_contour_legend():
+    """
+    Support contour element labels. If has no label should trigger warning.
+    """
+    for label in ('label', None):
+        fig, axs = pplt.subplots(ncols=2)
+        ax = axs[0]
+        ax.contour(state.rand(5, 5), color='k', label=label, legend='b')
+        ax = axs[1]
+        ax.pcolor(state.rand(5, 5), label=label, legend='b')
+
+
+@pplt.tests.image_compare
+def test_histogram_legend():
+    """
+    Support complex histogram legends.
+    """
+    pplt.rc.inlinefmt = 'svg'
+    fig, ax = pplt.subplots()
+    res = ax.hist(
+        state.rand(500, 2), 4, labels=('label', 'other'), edgefix=True, legend='b'
+    )
+    ax.legend(res, loc='r', ncol=1)  # should issue warning after ignoring numpy arrays
+    df = pd.DataFrame(
+        {
+            'length': [1.5, 0.5, 1.2, 0.9, 3],
+            'width': [0.7, 0.2, 0.15, 0.2, 1.1]
+        },
+        index=['pig', 'rabbit', 'duck', 'chicken', 'horse']
+    )
+    fig, axs = pplt.subplots(ncols=3)
+    ax = axs[0]
+    res = ax.hist(df, bins=3, legend=True, lw=3)
+    ax.legend(loc='b')
+    for ax, meth in zip(axs[1:], ('bar', 'area')):
+        hs = getattr(ax, meth)(df, legend='ul', lw=3)
+        ax.legend(hs, loc='b')
+
+
+@pplt.tests.image_compare
+def test_multiple_calls():
+    """
+    Test successive plotting additions to guides.
+    """
+    fig, ax = pplt.subplots()
+    ax.pcolor(state.rand(10, 10), colorbar='b')
+    ax.pcolor(state.rand(10, 5), cmap='grays', colorbar='b')
+    ax.pcolor(state.rand(10, 5), cmap='grays', colorbar='b')
+
+    fig, ax = pplt.subplots()
+    data = state.rand(10, 5)
+    for i in range(data.shape[1]):
+        ax.plot(data[:, i], colorbar='b', label=f'x{i}', colorbar_kw={'label': 'hello'})
+
+
+@pplt.tests.image_compare
+def test_tuple_handles():
+    """
+    Test tuple legend handles.
+    """
+    from matplotlib import legend_handler
+    fig, ax = pplt.subplots(refwidth=3, abc='A.', abcloc='ul', span=False)
+    patches = ax.fill_between(state.rand(10, 3), stack=True)
+    lines = ax.line(1 + 0.5 * (state.rand(10, 3) - 0.5).cumsum(axis=0))
+    # ax.legend([(handles[0], lines[1])], ['joint label'], loc='bottom', queue=True)
+    for hs in (lines, patches):
+        ax.legend(
+            [tuple(hs[:3]) if len(hs) == 3 else hs],
+            ['joint label'],
+            loc='bottom',
+            queue=True,
+            ncol=1,
+            handlelength=4.5,
+            handleheight=1.5,
+            handler_map={tuple: legend_handler.HandlerTuple(pad=0, ndivide=3)}
+        )
diff --git a/proplot/tests/test_projections.py b/proplot/tests/test_projections.py
new file mode 100644
index 000000000..4f9ddf789
--- /dev/null
+++ b/proplot/tests/test_projections.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Test projection features.
+"""
+import cartopy.crs as ccrs
+import matplotlib.pyplot as plt
+import numpy as np
+
+import proplot as pplt
+
+state = np.random.RandomState(51423)
+
+
+@pplt.tests.image_compare
+def test_aspect_ratios():
+    """
+    Test aspect ratio adjustments.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    axs[0].format(aspect=1.5)
+
+    fig, axs = pplt.subplots(ncols=2, proj=('cart', 'cyl'), aspect=2)
+    axs[0].set_aspect(1)
+
+
+if pplt.internals._version_mpl <= '3.2':
+    @pplt.tests.image_compare
+    def test_basemap_labels():
+        """
+        Add basemap labels.
+        """
+        fig, axs = pplt.subplots(ncols=2, proj='robin', refwidth=3, basemap=True)
+        axs.format(coast=True, labels='rt')
+
+
+@pplt.tests.image_compare
+def test_cartopy_labels():
+    """
+    Add cartopy labels.
+    """
+    fig, axs = pplt.subplots(ncols=2, proj='robin', refwidth=3)
+    axs.format(coast=True, labels=True)
+    axs[0].format(inlinelabels=True)
+    axs[1].format(rotatelabels=True)
+
+
+@pplt.tests.image_compare
+def test_cartopy_contours():
+    """
+    Test bug with cartopy contours. Sometimes results in global coverage
+    with single color sometimes not.
+    """
+    N = 10
+    fig = plt.figure(figsize=(5, 2.5))
+    ax = fig.add_subplot(projection=ccrs.Mollweide())
+    ax.coastlines()
+    x = np.linspace(-180, 180, N)
+    y = np.linspace(-90, 90, N)
+    z = state.rand(N, N) * 10 - 5
+    m = ax.contourf(
+        x, y, z,
+        transform=ccrs.PlateCarree(),
+        cmap='RdBu_r',
+        vmin=-5,
+        vmax=5,
+    )
+    fig.colorbar(m, ax=ax)
+
+    fig = pplt.figure()
+    ax = fig.add_subplot(
+        projection=pplt.Mollweide(),
+        extent='auto'
+    )
+    ax.coastlines()
+    N = 10
+    m = ax.contourf(
+        np.linspace(0, 180, N),
+        np.linspace(0, 90, N)[1::2],
+        state.rand(N // 2, N) * 10 + 5,
+        cmap='BuRd',
+        transform=pplt.PlateCarree(),
+        edgefix=False
+    )
+    fig.colorbar(m, ax=ax)
+
+
+@pplt.tests.image_compare
+def test_cartopy_manual():
+    """
+    Test alternative workflow without classes.
+    """
+    fig = pplt.figure()
+    proj = pplt.Proj('npstere')
+    # fig.add_axes([0.1, 0.1, 0.9, 0.9], proj='geo', map_projection=proj)
+    fig.add_subplot(111, proj='geo', land=True, map_projection=proj)
+
+
+@pplt.tests.image_compare
+def test_three_axes():
+    """
+    Test basic 3D axes here.
+    """
+    pplt.rc['tick.minor'] = False
+    fig, ax = pplt.subplots(proj='3d', outerpad=3)
+
+
+@pplt.tests.image_compare
+def test_projection_dicts():
+    """
+    Test projection dictionaries.
+    """
+    fig = pplt.figure(refnum=1)
+    a = [
+        [1, 0],
+        [1, 4],
+        [2, 4],
+        [2, 4],
+        [3, 4],
+        [3, 0]
+    ]
+    fig.subplots(a, proj={1: 'cyl', 2: 'cart', 3: 'cart', 4: 'cart'})
+
+
+@pplt.tests.image_compare
+def test_polar_projections():
+    """
+    Rigorously test polar features here.
+    """
+    fig, ax = pplt.subplots(proj='polar')
+    ax.format(
+        rlabelpos=45,
+        thetadir=-1,
+        thetalines=90,
+        thetalim=(0, 270),
+        theta0='N',
+        r0=0,
+        rlim=(0.5, 1),
+        rlines=0.25,
+    )
diff --git a/proplot/tests/test_subplots.py b/proplot/tests/test_subplots.py
new file mode 100644
index 000000000..15b160265
--- /dev/null
+++ b/proplot/tests/test_subplots.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+"""
+Test subplot layout.
+"""
+import numpy as np
+
+import proplot as pplt
+
+
+@pplt.tests.image_compare
+def test_align_labels():
+    """
+    Test spanning and aligned labels.
+    """
+    fig, axs = pplt.subplots(
+        [[2, 1, 4], [2, 3, 5]], refnum=2, refwidth=1.5, align=1, span=0
+    )
+    fig.format(xlabel='xlabel', ylabel='ylabel', abc='A.', abcloc='ul')
+    axs[0].format(ylim=(10000, 20000))
+    axs[-1].panel_axes('bottom', share=False)
+
+
+@pplt.tests.image_compare
+def test_share_all_basic():
+    """
+    Test sharing level all.
+    """
+    # Simple example
+    N = 10
+    fig, axs = pplt.subplots(nrows=1, ncols=2, refwidth=1.5, share='all')
+    axs[0].plot(np.arange(N) * 1e2, np.arange(N) * 1e4)
+    # Complex example
+    fig, axs = pplt.subplots(nrows=2, ncols=2, refwidth=1.5, share='all')
+    axs[0].panel('b')
+    pax = axs[0].panel('r')
+    pax.format(ylabel='label')
+    axs[0].plot(np.arange(N) * 1e2, np.arange(N) * 1e4)
+
+
+@pplt.tests.image_compare
+def test_span_labels():
+    """
+    Rigorous tests of spanning and aligned labels feature.
+    """
+    fig, axs = pplt.subplots([[1, 2, 4], [1, 3, 5]], refwidth=1.5, share=0, span=1)
+    fig.format(xlabel='xlabel', ylabel='ylabel', abc='A.', abcloc='ul')
+    axs[1].format()  # xlabel='xlabel')
+    axs[2].format()
+
+
+@pplt.tests.image_compare
+def test_title_deflection():
+    """
+    Test the deflection of titles above and below panels.
+    """
+    fig, ax = pplt.subplots()
+    # ax.format(abc='A.', title='Title', titleloc='left', titlepad=30)
+    tax = ax.panel_axes('top')
+    ax.format(titleabove=False)  # redirects to bottom
+    ax.format(abc='A.', title='Title', titleloc='left', titlepad=50)
+    ax.format(xlabel='xlabel', ylabel='ylabel', ylabelpad=50)
+    tax.format(title='Fear Me', title_kw={'size': 'x-large'})
+    tax.format(ultitle='Inner', titlebbox=True, title_kw={'size': 'med-large'})
+
+
+@pplt.tests.image_compare
+def test_complex_ticks():
+    """
+    Normally title offset with these different tick arrangements is tricky
+    but `_update_title_position` accounts for edge cases.
+    """
+    fig, axs = pplt.subplots(ncols=2)
+    axs[0].format(
+        xtickloc='both',
+        xticklabelloc='top',
+        xlabelloc='top',
+        title='title',
+        xlabel='xlabel',
+        suptitle='Test',
+    )
+    axs[1].format(
+        xtickloc='both',
+        xticklabelloc='top',
+        # xlabelloc='top',
+        xlabel='xlabel',
+        title='title',
+        suptitle='Test',
+    )
+
+
+@pplt.tests.image_compare
+def test_both_ticklabels():
+    """
+    Test both tick labels.
+    """
+    fig, ax = pplt.subplots()  # when both, have weird bug
+    ax.format(xticklabelloc='both', title='title', suptitle='Test')
+    fig, ax = pplt.subplots()  # when *just top*, bug disappears
+    ax.format(xtickloc='top', xticklabelloc='top', title='title', suptitle='Test')
+    fig, ax = pplt.subplots()  # not sure here
+    ax.format(xtickloc='both', xticklabelloc='neither', suptitle='Test')
+    fig, ax = pplt.subplots()  # doesn't seem to change the title offset though
+    ax.format(xtickloc='top', xticklabelloc='neither', suptitle='Test')
+
+
+@pplt.tests.image_compare
+def test_gridspec_copies():
+    """
+    Test whether gridspec copies work.
+    """
+    fig1, ax = pplt.subplots(ncols=2)
+    gs = fig1.gridspec.copy(left=5, wspace=0, right=5)
+    fig2 = pplt.figure()
+    fig2.add_subplots(gs)
+    fig = pplt.figure()
+    with pplt.tests.raises(ValueError):
+        fig.add_subplots(gs)  # should raise error
+    return (fig1, fig2)
+
+
+@pplt.tests.image_compare
+def test_aligned_outer_guides():
+    """
+    Test alignment adjustment.
+    """
+    fig, ax = pplt.subplot()
+    h1 = ax.plot(np.arange(5), label='foo')
+    h2 = ax.plot(np.arange(5) + 1, label='bar')
+    h3 = ax.plot(np.arange(5) + 2, label='baz')
+    ax.legend(h1, loc='bottom', align='left')
+    ax.legend(h2, loc='bottom', align='right')
+    ax.legend(h3, loc='b', align='c')
+    ax.colorbar('magma', loc='right', align='top', shrink=0.4)  # same as length
+    ax.colorbar('magma', loc='right', align='bottom', shrink=0.4)
+    ax.colorbar('magma', loc='left', align='top', length=0.6)  # should offset
+    ax.colorbar('magma', loc='left', align='bottom', length=0.6)
+    ax.legend(h1, loc='top', align='right', pad='4pt', frame=False)
+    ax.format(title='Very long title', titlepad=6, titleloc='left')
+
+
+def test_reference_aspect():
+    """
+    Rigorous test of reference aspect ratio accuracy.
+    """
+    # A simple test
+    refwidth = 1.5
+    fig, axs = pplt.subplots(ncols=2, refwidth=refwidth)
+    fig.auto_layout()
+    assert np.isclose(refwidth, axs[fig._refnum - 1]._get_size_inches()[0])
+
+    # A test with funky layout
+    refwidth = 1.5
+    fig, axs = pplt.subplots([[1, 1, 2, 2], [0, 3, 3, 0]], ref=3, refwidth=refwidth)
+    axs[1].panel_axes('left')
+    axs.format(xlocator=0.2, ylocator=0.2)
+    fig.auto_layout()
+    assert np.isclose(refwidth, axs[fig._refnum - 1]._get_size_inches()[0])
+
+    # A test with panels
+    refwidth = 2.0
+    fig, axs = pplt.subplots(
+        [[1, 1, 2], [3, 4, 5], [3, 4, 6]], hratios=(2, 1, 1), refwidth=refwidth
+    )
+    axs[2].panel_axes('right', width=0.5)
+    axs[0].panel_axes('bottom', width=0.5)
+    axs[3].panel_axes('left', width=0.5)
+    fig.auto_layout()
+    assert np.isclose(refwidth, axs[fig._refnum - 1]._get_size_inches()[0])