diff --git a/doc/api/colors_api.rst b/doc/api/colors_api.rst
index e7b6da70f641..5d6dd3f70880 100644
--- a/doc/api/colors_api.rst
+++ b/doc/api/colors_api.rst
@@ -14,42 +14,307 @@
    :no-members:
    :no-inherited-members:
 
-Classes
--------
+Color Conversion tools
+----------------------
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
+   is_color_like
+   same_color
+
+   hsv_to_rgb
+   rgb_to_hsv
+
+   to_hex
+   to_rgb
+   to_rgba
+   to_rgba_array
+
+   get_named_colors_mapping
+
+
+.. _norms_and_colormaps:
+
+Normalization and Colormapping of Continuous Data
+-------------------------------------------------
+
+Some `~.artist.Artist` classes can map an array of input data to RGBA
+values, (ex `~.axes.Axes.scatter` or `~.axes.Axes.imshow`).  The
+machinery for this is implemented via the `~.cm.ScalarMappable` base
+class in `~.cm` and the `~.Normalize` and `~.Colormap` classes in
+`~.colors` (this module).
+
+At the core, colormapping is going from a scalar value to a RGB tuple (formally
+:math:`f(x) : ℝ^1 \rightarrow ℝ^3`).  To effectively communicate through the
+color we want pick a :ref:`colormap suited to the data
+<sphx_glr_tutorials_colors_colormaps.py>`.  For continuous data types [#f1]_ a
+"good" colormap smoothly and continuously change their RGB values as a function
+of the input that trace out a 1-dimensional path through the 3-dimensional RGB
+space [#f2]_.  We can restrict the domain of :math:`f` to $[0, 1]$ which we
+interpret as the normalized distance along the curve.  This allows us to
+cleanly separate the mapping process from the continuous input data to RGB into
+two steps:
+
+1. the mapping between the user's data to distance along the curve
+2. the parameterized path through color space.
+
+The first step is expressed through the `.Normalize` family of classes and the
+second is expressed in Matplotlib via the `.Colormap` family of classes.  This
+allows us to fully independently pick the functional transform (e.g., linear vs
+log) from data space to distance along the curve space, what (user) data range
+to show (via the ``vmin`` and ``vmax`` attributes on `.Normalize`, or via the
+`.cm.ScalarMappable.set_clim` method), and what colors to use (by selecting the
+`.Colormap`).  Both `.Colormap` and `.Normalize` are implemented as `callable
+classes <https://docs.python.org/3/reference/datamodel.html#object.__call__>`__
+which allows use to bind some (mutable) state to a function call. The complete
+functionality is exposed in the `.ScalarMappable` family of artists which have
+a `.Colormap` and `.Normalize` instances and are responsible for invoking them
+at draw time.
+
+The `.Normalize` family has 3 common attributes: *vmin*, *vmax*, and *clip*
+which control the data limits.  The `.Normalize.__call__` signature is ::
+
+  def __call__(value: RawData, clip:Optional[Bool] =None) -> NormedData:
+     ...
+
+It takes in data in the user's data space and converts it to *NormedData* with
+the range:
+
+.. math::
+
+     \begin{cases}
+      \mathrm{under} & d <  vmin \\
+      [0, 1] & vmin \leq d \leq vmax \\
+      \mathrm{over} & vmax < d \\
+      \mathrm{bad} & !\mathrm{np.finite(d)}
+    \end{cases}
+
+
+
+The `.Colormap.__call__` signature when passed *NormedData* (floats) [#f3]_ is
+::
+
+  def __call__(self, X: NormedData,
+               alpha:Optional[float] =None, bytes:Bool=False) -> RGBA:
+     ...
+
+In addition to parameterized path through RGB (which handles values in $[0,
+1]$,  `.Colormap` objects carry three additional colors:
+
+- *over* (`.Colormap.set_over` / `.Colormap.get_over`)
+- *under* (`.Colormap.set_under` / `.Colormap.get_under`)
+- *bad* (`.Colormap.set_bad` / `.Colormap.get_bad`)
+
+which control the color for the corresponding values in *NormedData*.
+By default the over and under colors are the top and bottom colors of
+the colormap respectively and bad is transparent.
+
+.. warning::
+
+   Using `.cm.get_cmap` may return to you a reference to a globally
+   visible instance of the colormap (rather than a new instance).  If
+   you plan to set the over/under/bad values we recommend you first
+   make a copy ::
+
+     from copy import copy
+     import matplotlib.cm as mcm
+
+     my_cmap = copy(mcm.get_cmap('viridis'))
+
+.. rubric:: Footnotes
+
+.. [#f1] Discrete data types, such as Categorical and Ordinal, have different
+         considerations.
+.. [#f2] Notable, the cubehelix colormap is named because it traces a helix
+         through the RGB color cube from black to white.
+.. [#f3] Discrete data, as handled by `.NoNorm` and `.BoundaryNorm` are passed
+         as integers and act as direct Look Up Table (LUT) indexes into the
+         colormap.
+
+In practice
+~~~~~~~~~~~
+
+To make the above section concrete, lets first consider the linear `.Normalize`
+
+.. ipython ::
+
+   In [104]: import matplotlib.colors as mcolors
+
+   In [105]: norm = mcolors.Normalize(vmin=100, vmax=300)
+
+   In [106]: norm
+   Out[106]: <matplotlib.colors.Normalize at 0x7f9bf441aeb0>
+
+If we now pass in values in the range of :math:`[vmin, vmax]`
+
+.. ipython ::
+
+
+   In [130]: norm([100, 200, 300])
+   Out[130]:
+   masked_array(data=[ 0. , 0.5, 1.],
+                mask=False,
+          fill_value=1e+20)
+
+We see that they are scaled as expected.  If we also pass in some over
+/ under / bad values
+
+.. ipython ::
+
+   In [131]: norm([0, 100, 200, 300, 400, np.nan])
+   Out[131]:
+   masked_array(data=[-0.5,  0. ,  0.5,  1. ,  1.5,  nan],
+                mask=False,
+          fill_value=1e+20)
+
+we see that they are also scaled and produce values outside of the
+range :math:`[0, 1]`.  If you need the values to be clipped, you can do
+have the norm do that for you via the *clip* kwarg
+
+.. ipython ::
+
+   In [130]: norm([0, 100, 200, 300, 400, np.nan], clip=True)
+   Out[130]:
+   masked_array(data=[0. , 0. , 0.5, 1. , 1. , nan],
+                mask=False,
+          fill_value=1e+20)
+
+The default value of *clip* can be set when instantiating the
+`.Normalize` instance.
+
+We can also use a non-linear norm
+
+.. ipython ::
+
+   In [136]: log_norm = mcolors.LogNorm(10, 1000)
+
+   In [137]: log_norm([10, 100, 1000])
+   Out[137]:
+   masked_array(data=[0.0, 0.5, 1.0],
+                mask=[False, False, False],
+          fill_value=1e+20)
+
+if the data has a large dynamic range.
+
+Once we have normalized our data we can pass it to the colormap
+
+.. ipython ::
+
+   In [102]: import copy
+
+   In [103]: import matplotlib.cm as mcm
+
+   In [141]: viridis = copy.copy(mcm.get_cmap('viridis'))
+
+.. ipython ::
+
+   In [143]: viridis([0, .5, 1])
+   Out[143]:
+   array([[0.267004, 0.004874, 0.329415, 1.      ],
+          [0.127568, 0.566949, 0.550556, 1.      ],
+          [0.993248, 0.906157, 0.143936, 1.      ]])
+
+Which pulls out the bottom, middle, and top color of the *viridis* .
+If we set the over/under/bad colors and pass out-of-range values we can also
+see them pulled out:
+
+.. ipython ::
+
+   In [144]: viridis.set_over('w')
+
+   In [147]: viridis.set_under('k')
+
+   In [148]: viridis.set_bad('r')
+
+   In [149]: viridis([1.5, -0.5, np.nan])
+   Out[149]:
+   array([[1., 1., 1., 1.],
+          [0., 0., 0., 1.],
+          [1., 0., 0., 1.]])
+
+
+
+Directly using a `.Colormap` outside of a `.ScalarMappable` can be useful
+to generate a family of coherent colors for plotting
+
+.. plot::
+    :include-source:
+
+    import matplotlib.cm as mcm
+    import numpy as np
+
+    cmap = mcm.get_cmap('viridis')
+    array_of_colors = cmap(np.linspace(0, 1, 5))
+
+    x = np.linspace(0, 1, 25)
+    fig, ax = plt.subplots(constrained_layout=True)
+    for j, color in enumerate(array_of_colors):
+        ax.plot(x, x**j, color=color, label=f'$x^{j}$')
+    ax.legend()
+
+API
+~~~
+
+Colormap Classes
+++++++++++++++++
 
 .. autosummary::
    :toctree: _as_gen/
    :template: autosummary.rst
 
-   BoundaryNorm
    Colormap
-   CenteredNorm
-   LightSource
    LinearSegmentedColormap
    ListedColormap
-   LogNorm
-   NoNorm
+
+
+.. inheritance-diagram:: matplotlib.colors.Colormap matplotlib.colors.LinearSegmentedColormap matplotlib.colors.ListedColormap
+   :parts: 1
+   :private-bases:
+
+
+Norm Classes
+++++++++++++
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
    Normalize
+   LogNorm
+   CenteredNorm
+   TwoSlopeNorm
    PowerNorm
    SymLogNorm
-   TwoSlopeNorm
    FuncNorm
+   BoundaryNorm
+   NoNorm
+
+
+.. inheritance-diagram:: matplotlib.colors.Normalize matplotlib.colors.LogNorm matplotlib.colors.PowerNorm matplotlib.colors.NoNorm matplotlib.colors.TwoSlopeNorm matplotlib.colors.SymLogNorm matplotlib.colors.BoundaryNorm matplotlib.colors.FuncNorm matplotlib.colors.CenteredNorm
+   :parts: 1
+   :private-bases:
+
+
+Factory Functions & Decorators
+++++++++++++++++++++++++++++++
 
-Functions
----------
 
 .. autosummary::
    :toctree: _as_gen/
    :template: autosummary.rst
 
    from_levels_and_colors
-   hsv_to_rgb
-   rgb_to_hsv
-   to_hex
-   to_rgb
-   to_rgba
-   to_rgba_array
-   is_color_like
-   same_color
-   get_named_colors_mapping
    make_norm_from_scale
+
+
+
+Hill Shading
+------------
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
+   LightSource
diff --git a/doc/api/image_api.rst b/doc/api/image_api.rst
index df3177395eef..4b742b5bf3f6 100644
--- a/doc/api/image_api.rst
+++ b/doc/api/image_api.rst
@@ -3,6 +3,175 @@
 ********************
 
 .. automodule:: matplotlib.image
-   :members:
-   :undoc-members:
-   :show-inheritance:
+   :no-members:
+   :no-inherited-members:
+
+
+Image Artists
+-------------
+
+.. inheritance-diagram::    matplotlib.image._ImageBase matplotlib.image.BboxImage matplotlib.image.FigureImage matplotlib.image.PcolorImage matplotlib.image.AxesImage matplotlib.image.NonUniformImage
+   :parts: 1
+   :private-bases:
+
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
+   _ImageBase
+   AxesImage
+   NonUniformImage
+   PcolorImage
+   FigureImage
+   BboxImage
+
+
+Resampling and Colormapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When Matplotlib rasterizes an image to save / display a Figure, we
+need to resample the user supplied data because the size of the input,
+in "data" pixels, will in general not match the size, in "screen"
+pixels, of the output.  This resampling process can introduce a
+variety of artifacts and the default interpolation is chosen to avoid
+aliasing in common cases (see
+:doc:`/gallery/images_contours_and_fields/image_antialiasing`).  The
+details of how we do the resampling is controlled by the
+*interpolation* kwarg and, depending on the kernel, may also
+implicitly smooth the user data.
+
+The processing steps for rendering a pseudo color image are currently:
+
+1. resample the user input to the required dimensions
+2. normalize the user data via a `~.colors.Normalize` instance
+3. colormap from the normalized data to RGBA via a `~.colors.Colormap` instance
+
+Prior to Matplotlib 2.0 we did the normalization and colormapping
+first and then resampled in RGBA space to fit the screen.  However,
+colormaps are not straight lines in RGB space the RGBA interpolated
+values may "cut the corner" and produce colors in the output image
+that are not present in the colormap when the data is changing close
+to the full range on the scale of a few screen pixels.  To fix this
+problem we re-ordered the processing in 2.0.
+
+
+
+
+What you need to know about Floating Point Arithmetic for Colormapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Floating point numbers, despite being ubiquitous, are not fully
+understood by most practitioners.  For a concise discussion of how
+floating point numbers work see `<https://floating-point-gui.de/>`_,
+for a through review see `Goldberg, ACM Computing Surveys (1991)
+10.1145/103162.103163 <https://doi.org/10.1145/103162.103163>`__
+(paywall), or to for all of the details see `IEEE Standard for
+Floating Point Arithmetic (IEEE std 754) 10.1109/IEEESTD.2008.4610935
+<https://doi.org/10.1109/IEEESTD.2008.4610935>`__ (paywall).  For the
+purposes of this discussion we need to know:
+
+1. There are only a finite number of "floating point numbers" (that is,
+   values that can be represented by a IEEE float in the computer) and
+   hence they can not exactly represent all Real Numbers.  Between
+   two Real Numbers there is an infinite number of Real numbers, hence
+   the floating point numbers and computation expressed in a computer
+   are an approximation of the Real Numbers.
+2. The absolute distance between adjacent floating point numbers
+   scales with the magnitude, while the relative distance remains
+   the same.  This is a consequence of the implementation of IEEE
+   floats.
+3. During computation results are rounded to the nearest
+   representable value.  Working with numbers that are either almost
+   identical or vastly different orders of magnitude exaggerates the
+   errors due to this rounding.
+
+This is relevant to images because, as an implementation detail, we
+make use of the Agg library to do the resampling from the data space
+to screen space and that code clips all input values to the range
+:math:`[0, 1]`.  In addition to mapping the colors "in range" we also
+map over, under, and bad values (see :ref:`norms_and_colormaps`) which need to be
+preserved through the resampling process.  Thus, we:
+
+1. scale the data to :math:`[.1, .9]`
+2. pass the data to Agg to resample the pixels
+3. scale back to the original data range
+
+and then resume going through the user supplied normalization and colormap.
+
+Naively, this could be expressed as ::
+
+  data_min, data_max = data.min, data.max
+  # scale to [.1, .9]
+  rescaled = .1 + .8 * (data - data_min) / (data_max - data_min)
+  # get the correct number of pixels
+  resampled = resample(scaled)
+  # scale back to original data range
+  scaled = (resampled - .1) * (data_max - data_min)  + data_min
+
+For "most" user data is OK, but can fail in interesting ways.
+
+If the range of the input data is large, but the range the user actually
+cares about is small this will effectively map all of the interesting
+data to the same value!  To counteract this, we have a check if min /
+max of the data are drastically different than the vmin / vmax of the
+norm we use a data range expanded from vmin/vmax in the rescaling.
+This was addressed in :ghissue:`10072`, :ghpull:`10133`, and
+:ghpull:`11047`.
+
+Due floating point math being an approximation of the exact infinite
+precision computation not all values "round trip" identically.  This
+cause the rescaling to move values in the input data that are very
+close to the values of vmin or vmax to the other side.  In the default
+case, when the over and under colors are equal to the top and bottom
+colors of the colormap respectively this is not visually apparent,
+however if the user sets a different color for over/under this is
+extremely apparent.  The solution is to also rescale the vmin and vmax
+values.  Despite accumulating errors, the float operations will
+preserve the relative ordering of values under :math:`\geq` and
+:math:`\leq`.  This was reported in :ghissue:`16910` and fixed in
+:ghpull:`17636`.
+
+Due to rescaling the vmin and vmax, under certain conditions the sign
+of the vmin may change.  In the case of a linear `~.colors.Normalize`
+this is not a problem, but in the case of a `~.colors.LogNorm` we
+check that both vmin and vmax are greater than 0.  This was reported
+in :ghissue:`18415` and fixed in :ghpull:`18458` by special casing
+`~.colors.LogNorm` and clipping vmin to be greater than 0.
+
+
+
+
+Helper functions
+----------------
+
+
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
+
+   composite_images
+   pil_to_array
+
+
+
+Image I/O functions
+-------------------
+
+These functions can be used to read, save, and generate thumbnails of
+files on disk.  These are here for historical reasons, and while it is
+unlikely we will remove them, please use a dedicated image I/O library
+(such as `imageio <https://imageio.github.io/>`__, `pillow
+<https://pillow.readthedocs.io/en/stable/>`__, or `tifffile
+<https://pypi.org/project/tifffile/>`__) instead.
+
+
+.. autosummary::
+   :toctree: _as_gen/
+   :template: autosummary.rst
+
+   imread
+   imsave
+   thumbnail
diff --git a/lib/matplotlib/colors.py b/lib/matplotlib/colors.py
index c5db6117f1bc..f8693e88b602 100644
--- a/lib/matplotlib/colors.py
+++ b/lib/matplotlib/colors.py
@@ -1099,8 +1099,33 @@ class Normalize:
     """
     A class which, when called, linearly normalizes data into the
     ``[0.0, 1.0]`` interval.
+
+    Attributes
+    ----------
+    vmin : float or None
+       The value mapped to 0.  Any input data less than *vmin* is mapped to
+       "under"
+
+       If `None`, will be inferred from the minimum of the first input data
+       passed to `.__call__`.
+
+    vmax : float or None
+       The value mapped to 1.  Any input data greater than *vmax* is mapped
+       to "over"
+
+       If `None`, will be inferred from the maximum of the first input data
+       passed to `.__call__`.
+
+    clip : bool
+       If `True`, then the input data is clipped to *vmin* and *vmax* before
+       normalizing.  This means there will be no value mapped as over or under.
+
     """
 
+    def __repr__(self):
+        cls_name = f'{self.__class__.__module__}.{self.__class__.__qualname__}'
+        return f'<{cls_name}, vmin={self.vmin}, vmax={self.vmax}>'
+
     def __init__(self, vmin=None, vmax=None, clip=False):
         """
         Parameters
diff --git a/lib/matplotlib/image.py b/lib/matplotlib/image.py
index ca5b7da5f808..5ad97bd9e795 100644
--- a/lib/matplotlib/image.py
+++ b/lib/matplotlib/image.py
@@ -1621,8 +1621,6 @@ def imsave(fname, arr, vmin=None, vmax=None, cmap=None, format=None,
     else:
         # Don't bother creating an image; this avoids rounding errors on the
         # size when dividing and then multiplying by dpi.
-        sm = cm.ScalarMappable(cmap=cmap)
-        sm.set_clim(vmin, vmax)
         if origin is None:
             origin = mpl.rcParams["image.origin"]
         if origin == "lower":
@@ -1635,6 +1633,8 @@ def imsave(fname, arr, vmin=None, vmax=None, cmap=None, format=None,
             # as is, saving a few operations.
             rgba = arr
         else:
+            sm = cm.ScalarMappable(cmap=cmap)
+            sm.set_clim(vmin, vmax)
             rgba = sm.to_rgba(arr, bytes=True)
         if pil_kwargs is None:
             pil_kwargs = {}
@@ -1674,9 +1674,7 @@ def imsave(fname, arr, vmin=None, vmax=None, cmap=None, format=None,
 
 def pil_to_array(pilImage):
     """
-    Load a `PIL image`_ and return it as a numpy int array.
-
-    .. _PIL image: https://pillow.readthedocs.io/en/latest/reference/Image.html
+    Load a `PIL.Image.Image` image and return it as a numpy int array.
 
     Returns
     -------