matplotlib · tacaswell · Apr 16, 2017 · Mar 26, 2017 · Mar 26, 2017 · Mar 26, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -45,14 +45,15 @@ env:
     - NPROC=2
     - INSTALL_PEP8=
     - RUN_PEP8=
+    - NOSE=
     - PYTEST_ARGS="-ra --maxfail=1 --timeout=300 --durations=25 --cov-report= --cov=lib -n $NPROC"
     - PYTHON_ARGS=
     - DELETE_FONT_CACHE=
 
 matrix:
   include:
     - python: 2.7
-      env: MOCK=mock NUMPY=numpy==1.7.1 PANDAS=pandas
+      env: MOCK=mock NUMPY=numpy==1.7.1 PANDAS=pandas NOSE=nose
     - python: 2.7
       env: BUILD_DOCS=true
     - python: 3.4
@@ -114,7 +115,7 @@ install:
     pip install --upgrade setuptools
   - |
     # Install dependencies from pypi
-    pip install $PRE python-dateutil $NUMPY pyparsing!=2.1.6 $PANDAS cycler codecov coverage $MOCK
+    pip install $PRE python-dateutil $NUMPY pyparsing!=2.1.6 $PANDAS cycler codecov coverage $MOCK $NOSE
     pip install $PRE -r doc-requirements.txt
 
     # pytest-cov>=2.3.1 due to https://github.com/pytest-dev/pytest-cov/issues/124

diff --git a/lib/matplotlib/sphinxext/tests/conftest.py b/lib/matplotlib/sphinxext/tests/conftest.py
@@ -2,4 +2,5 @@
                         unicode_literals)
 
 from matplotlib.testing.conftest import (mpl_test_settings,
+                                         mpl_image_comparison_parameters,
                                          pytest_configure, pytest_unconfigure)
diff --git a/lib/matplotlib/testing/compare.py b/lib/matplotlib/testing/compare.py
@@ -366,7 +366,7 @@ def calculate_rms(expectedImage, actualImage):
     "Calculate the per-pixel errors, then compute the root mean square error."
     if expectedImage.shape != actualImage.shape:
         raise ImageComparisonFailure(
-            "image sizes do not match expected size: {0} "
+            "Image sizes do not match expected size: {0} "
             "actual size {1}".format(expectedImage.shape, actualImage.shape))
     num_values = expectedImage.size
     abs_diff_image = abs(expectedImage - actualImage)
@@ -470,7 +470,10 @@ def save_diff_image(expected, actual, output):
         actual, actualImage, expected, expectedImage)
     expectedImage = np.array(expectedImage).astype(float)
     actualImage = np.array(actualImage).astype(float)
-    assert expectedImage.shape == actualImage.shape
+    if expectedImage.shape != actualImage.shape:
+        raise ImageComparisonFailure(
+            "Image sizes do not match expected size: {0} "
+            "actual size {1}".format(expectedImage.shape, actualImage.shape))
     absDiffImage = np.abs(expectedImage - actualImage)
 
     # expand differences in luminance domain

diff --git a/lib/matplotlib/testing/conftest.py b/lib/matplotlib/testing/conftest.py
@@ -31,7 +31,7 @@ def mpl_test_settings(request):
         backend = backend_marker.args[0]
         prev_backend = matplotlib.get_backend()
 
-    style = 'classic'
+    style = '_classic_test'  # Default of cleanup and image_comparison too.
     style_marker = request.keywords.get('style')
     if style_marker is not None:
         assert len(style_marker.args) == 1, \
@@ -53,3 +53,27 @@ def mpl_test_settings(request):
             plt.switch_backend(prev_backend)
         _do_cleanup(original_units_registry,
                     original_settings)
+
+
+@pytest.fixture
+def mpl_image_comparison_parameters(request, extension):
+    # This fixture is applied automatically by the image_comparison decorator.
+    #
+    # The sole purpose of this fixture is to provide an indirect method of
+    # obtaining parameters *without* modifying the decorated function
+    # signature. In this way, the function signature can stay the same and
+    # pytest won't get confused.
+    # We annotate the decorated function with any parameters captured by this
+    # fixture so that they can be used by the wrapper in image_comparison.
+    baseline_images = request.keywords['baseline_images'].args[0]
+    if baseline_images is None:
+        # Allow baseline image list to be produced on the fly based on current
+        # parametrization.
+        baseline_images = request.getfixturevalue('baseline_images')
+
+    func = request.function
+    func.__wrapped__.parameters = (baseline_images, extension)
+    try:
+        yield
+    finally:
+        delattr(func.__wrapped__, 'parameters')
diff --git a/lib/matplotlib/testing/decorators.py b/lib/matplotlib/testing/decorators.py
@@ -150,6 +150,7 @@ def wrapped_callable(*args, **kwargs):
         return make_cleanup
     else:
         result = make_cleanup(style)
+        # Default of mpl_test_settings fixture and image_comparison too.
         style = '_classic_test'
         return result
 
@@ -232,42 +233,24 @@ def _mark_xfail_if_format_is_uncomparable(extension):
         return extension
 
 
-class ImageComparisonDecorator(CleanupTest):
-    def __init__(self, baseline_images, extensions, tol,
-                 freetype_version, remove_text, savefig_kwargs, style):
+class _ImageComparisonBase(object):
+    """
+    Image comparison base class
+
+    This class provides *just* the comparison-related functionality and avoids
+    any code that would be specific to any testing framework.
+    """
+    def __init__(self, tol, remove_text, savefig_kwargs):
         self.func = self.baseline_dir = self.result_dir = None
-        self.baseline_images = baseline_images
-        self.extensions = extensions
         self.tol = tol
-        self.freetype_version = freetype_version
         self.remove_text = remove_text
         self.savefig_kwargs = savefig_kwargs
-        self.style = style
 
     def delayed_init(self, func):
         assert self.func is None, "it looks like same decorator used twice"
         self.func = func
         self.baseline_dir, self.result_dir = _image_directories(func)
 
-    def setup(self):
-        func = self.func
-        plt.close('all')
-        self.setup_class()
-        try:
-            matplotlib.style.use(self.style)
-            matplotlib.testing.set_font_settings_for_testing()
-            func()
-            assert len(plt.get_fignums()) == len(self.baseline_images), (
-                "Test generated {} images but there are {} baseline images"
-                .format(len(plt.get_fignums()), len(self.baseline_images)))
-        except:
-            # Restore original settings before raising errors during the update.
-            self.teardown_class()
-            raise
-
-    def teardown(self):
-        self.teardown_class()
-
     def copy_baseline(self, baseline, extension):
         baseline_path = os.path.join(self.baseline_dir, baseline)
         orig_expected_fname = baseline_path + '.' + extension
@@ -303,6 +286,50 @@ def compare(self, idx, baseline, extension):
         expected_fname = self.copy_baseline(baseline, extension)
         _raise_on_image_difference(expected_fname, actual_fname, self.tol)
 
+
+class ImageComparisonTest(CleanupTest, _ImageComparisonBase):
+    """
+    Nose-based image comparison class
+
+    This class generates tests for a nose-based testing framework. Ideally,
+    this class would not be public, and the only publically visible API would
+    be the :func:`image_comparison` decorator. Unfortunately, there are
+    existing downstream users of this class (e.g., pytest-mpl) so it cannot yet
+    be removed.
+    """
+    def __init__(self, baseline_images, extensions, tol,
+                 freetype_version, remove_text, savefig_kwargs, style):
+        _ImageComparisonBase.__init__(self, tol, remove_text, savefig_kwargs)
+        self.baseline_images = baseline_images
+        self.extensions = extensions
+        self.freetype_version = freetype_version
+        self.style = style
+
+    def setup(self):
+        func = self.func
+        plt.close('all')
+        self.setup_class()
+        try:
+            matplotlib.style.use(self.style)
+            matplotlib.testing.set_font_settings_for_testing()
+            func()
+            assert len(plt.get_fignums()) == len(self.baseline_images), (
+                "Test generated {} images but there are {} baseline images"
+                .format(len(plt.get_fignums()), len(self.baseline_images)))
+        except:
+            # Restore original settings before raising errors.
+            self.teardown_class()
+            raise
+
+    def teardown(self):
+        self.teardown_class()
+
+    @staticmethod
+    @cbook.deprecated('2.1',
+                      alternative='remove_ticks_and_titles')
+    def remove_text(figure):
+        remove_ticks_and_titles(figure)
+
     def nose_runner(self):
         func = self.compare
         func = _checked_on_freetype_version(self.freetype_version)(func)
@@ -312,68 +339,89 @@ def nose_runner(self):
             for extension in self.extensions:
                 yield funcs[extension], idx, baseline, extension
 
-    def pytest_runner(self):
-        from pytest import mark
+    def __call__(self, func):
+        self.delayed_init(func)
+        import nose.tools
 
-        extensions = map(_mark_xfail_if_format_is_uncomparable,
-                         self.extensions)
+        @nose.tools.with_setup(self.setup, self.teardown)
+        def runner_wrapper():
+            for case in self.nose_runner():
+                yield case
 
-        if len(set(self.baseline_images)) == len(self.baseline_images):
-            @mark.parametrize("extension", extensions)
-            @mark.parametrize("idx,baseline", enumerate(self.baseline_images))
-            @_checked_on_freetype_version(self.freetype_version)
-            def wrapper(idx, baseline, extension):
-                __tracebackhide__ = True
-                self.compare(idx, baseline, extension)
-        else:
-            # Some baseline images are repeated, so run this in serial.
-            @mark.parametrize("extension", extensions)
-            @_checked_on_freetype_version(self.freetype_version)
-            def wrapper(extension):
-                __tracebackhide__ = True
-                for idx, baseline in enumerate(self.baseline_images):
-                    self.compare(idx, baseline, extension)
+        return _copy_metadata(func, runner_wrapper)
 
 
-        # sadly we cannot use fixture here because of visibility problems
-        # and for for obvious reason avoid `_nose.tools.with_setup`
-        wrapper.setup, wrapper.teardown = self.setup, self.teardown
+def _pytest_image_comparison(baseline_images, extensions, tol,
+                             freetype_version, remove_text, savefig_kwargs,
+                             style):
+    """
+    Decorate function with image comparison for pytest.
 
-        return wrapper
+    This function creates a decorator that wraps a figure-generating function
+    with image comparison code. Pytest can become confused if we change the
+    signature of the function, so we indirectly pass anything we need via the
+    `mpl_image_comparison_parameters` fixture and extra markers.
+    """
+    import pytest
+
+    extensions = map(_mark_xfail_if_format_is_uncomparable, extensions)
+
+    def decorator(func):
+        # Parameter indirection; see docstring above and comment below.
+        @pytest.mark.usefixtures('mpl_image_comparison_parameters')
+        @pytest.mark.parametrize('extension', extensions)
+        @pytest.mark.baseline_images(baseline_images)
+        # END Parameter indirection.
+        @pytest.mark.style(style)
+        @_checked_on_freetype_version(freetype_version)
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            __tracebackhide__ = True
+            img = _ImageComparisonBase(tol=tol, remove_text=remove_text,
+                                       savefig_kwargs=savefig_kwargs)
+            img.delayed_init(func)
+            matplotlib.testing.set_font_settings_for_testing()
+            func(*args, **kwargs)
 
-    def __call__(self, func):
-        self.delayed_init(func)
-        if is_called_from_pytest():
-            return _copy_metadata(func, self.pytest_runner())
-        else:
-            import nose.tools
+            # Parameter indirection:
+            # This is hacked on via the mpl_image_comparison_parameters fixture
+            # so that we don't need to modify the function's real signature for
+            # any parametrization. Modifying the signature is very very tricky
+            # and likely to confuse pytest.
+            baseline_images, extension = func.parameters
 
-            @nose.tools.with_setup(self.setup, self.teardown)
-            def runner_wrapper():
-                try:
-                    for case in self.nose_runner():
-                        yield case
-                except GeneratorExit:
-                    # nose bug...
-                    self.teardown()
+            assert len(plt.get_fignums()) == len(baseline_images), (
+                "Test generated {} images but there are {} baseline images"
+                .format(len(plt.get_fignums()), len(baseline_images)))
+            for idx, baseline in enumerate(baseline_images):
+                img.compare(idx, baseline, extension)
 
-            return _copy_metadata(func, runner_wrapper)
+        wrapper.__wrapped__ = func  # For Python 2.7.
+        return _copy_metadata(func, wrapper)
 
+    return decorator
 
-def image_comparison(baseline_images=None, extensions=None, tol=0,
+
+def image_comparison(baseline_images, extensions=None, tol=0,
                      freetype_version=None, remove_text=False,
-                     savefig_kwarg=None, style='_classic_test'):
+                     savefig_kwarg=None,
+                     # Default of mpl_test_settings fixture and cleanup too.
+                     style='_classic_test'):
     """
     Compare images generated by the test with those specified in
     *baseline_images*, which must correspond else an
     ImageComparisonFailure exception will be raised.
 
     Arguments
     ---------
-    baseline_images : list
+    baseline_images : list or None
         A list of strings specifying the names of the images generated by
         calls to :meth:`matplotlib.figure.savefig`.
 
+        If *None*, the test function must use the ``baseline_images`` fixture,
+        either as a parameter or with pytest.mark.usefixtures. This value is
+        only allowed when using pytest.
+
     extensions : [ None | list ]
 
         If None, defaults to all supported extensions.
@@ -400,9 +448,6 @@ def image_comparison(baseline_images=None, extensions=None, tol=0,
         '_classic_test' style.
 
     """
-    if baseline_images is None:
-        raise ValueError('baseline_images must be specified')
-
     if extensions is None:
         # default extensions to test
         extensions = ['png', 'pdf', 'svg']
@@ -411,10 +456,19 @@ def image_comparison(baseline_images=None, extensions=None, tol=0,
         #default no kwargs to savefig
         savefig_kwarg = dict()
 
-    return ImageComparisonDecorator(
-        baseline_images=baseline_images, extensions=extensions, tol=tol,
-        freetype_version=freetype_version, remove_text=remove_text,
-        savefig_kwargs=savefig_kwarg, style=style)
+    if is_called_from_pytest():
+        return _pytest_image_comparison(
+            baseline_images=baseline_images, extensions=extensions, tol=tol,
+            freetype_version=freetype_version, remove_text=remove_text,
+            savefig_kwargs=savefig_kwarg, style=style)
+    else:
+        if baseline_images is None:
+            raise ValueError('baseline_images must be specified')
+
+        return ImageComparisonTest(
+            baseline_images=baseline_images, extensions=extensions, tol=tol,
+            freetype_version=freetype_version, remove_text=remove_text,
+            savefig_kwargs=savefig_kwarg, style=style)
 
 
 def _image_directories(func):

diff --git a/lib/matplotlib/tests/baseline_images/test_compare_images/simple.pdf b/lib/matplotlib/tests/baseline_images/test_compare_images/simple.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_compare_images/simple.png b/lib/matplotlib/tests/baseline_images/test_compare_images/simple.png