matplotlib · jkseppan · Jan 7, 2017 · Jan 21, 2017 · Jan 21, 2017 · Jan 22, 2017
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,11 @@
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+
+def pytest_addoption(parser):
+    group = parser.getgroup("matplotlib", "matplotlib custom options")
+    group.addoption("--conversion-cache-max-size", action="store",
+                    help="conversion cache maximum size in bytes")
+    group.addoption("--conversion-cache-report-misses",
+                    action="store_true",
+                    help="report conversion cache misses")
diff --git a/lib/matplotlib/sphinxext/tests/conftest.py b/lib/matplotlib/sphinxext/tests/conftest.py
@@ -1,5 +1,6 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
-from matplotlib.testing.conftest import (mpl_test_settings,
-                                         pytest_configure, pytest_unconfigure)
+from matplotlib.testing.conftest import (
+    mpl_test_settings, pytest_configure, pytest_unconfigure,
+    pytest_sessionfinish, pytest_terminal_summary)
diff --git a/lib/matplotlib/testing/_conversion_cache.py b/lib/matplotlib/testing/_conversion_cache.py
@@ -0,0 +1,209 @@
+"""
+A cache of png files keyed by the MD5 hashes of corresponding svg and
+pdf files, to reduce test suite running times for svg and pdf files
+that stay exactly the same from one run to the next.
+
+The test setup in matplotlib.tests.conftest sets this up for image
+comparison tests and allows specifying the maximum cache size.
+"""
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import hashlib
+import shutil
+import os
+import warnings
+
+from matplotlib import _get_cachedir
+from matplotlib import cbook
+from matplotlib import checkdep_ghostscript
+from matplotlib import checkdep_inkscape
+
+
+class _ConversionCache(object):
+    """A cache that stores png files converted from svg or pdf formats.
+
+    The image comparison test cases compare svg and pdf files by
+    converting them to png files. When a test case has produced a
+    file, e.g. result.pdf, it queries this cache by the pathname
+    '/path/to/result_images/result.pdf'. The cache computes a hash of
+    the file (and the version of the external software used to convert
+    the file) and if a result by that hash value is available, writes
+    the data to the output location supplied by the caller. Otherwise
+    the test case has to run the conversion and can then insert the
+    result into the cache.
+
+    Parameters
+    ----------
+    directory : str, optional
+        Files are stored in this directory. Defaults to `'test_cache'` in
+        the overall Matplotlib cache directory.
+    max_size : int, optional
+        The flush method will delete files until their combined size is
+        under this limit, in bytes. Defaults to 100 megabytes.
+
+    """
+
+    def __init__(self, directory=None, max_size=int(1e8)):
+        # gets is the set of filenames queried from the cache
+        self.gets = set()
+        # hits is the set of filenames found in the cache when queried
+        self.hits = set()
+        if directory is not None:
+            self.cachedir = directory
+        else:
+            self.cachedir = self.get_cache_dir()
+        self.ensure_cache_dir()
+        if not isinstance(max_size, int):
+            raise ValueError("max_size is of type %s, expected int" %
+                             type(max_size))
+        self.max_size = max_size
+        self.cached_ext = '.png'
+        self.converter_version = {}
+        _, gs_version = checkdep_ghostscript()
+        if gs_version is not None:
+            self.converter_version['.pdf'] = gs_version.encode('utf-8')
+        is_version = checkdep_inkscape()
+        if is_version is not None:
+            self.converter_version['.svg'] = is_version.encode('utf-8')
+        self.hash_cache = {}
+
+    def get(self, filename, newname):
+        """Query the cache.
+
+        Parameters
+        ----------
+        filename : str
+            Full path to the original file.
+        newname : str
+            Path to which the result should be written.
+
+        Returns
+        -------
+        bool
+            True if the file was found in the cache and is now written
+            to `newname`.
+        """
+        self.gets.add(filename)
+        hash_value = self._get_file_hash(filename)
+        cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
+        with cbook.Locked(self.cachedir):
+            if os.path.exists(cached_file):
+                shutil.copyfile(cached_file, newname)
+                self.hits.add(filename)
+                return True
+            else:
+                return False
+
+    def put(self, original, converted):
+        """Insert a file into the cache.
+
+        Parameters
+        ----------
+        original : str
+            Full path to the original file.
+        converted : str
+            Full path to the png file converted from the original.
+        """
+        hash_value = self._get_file_hash(original)
+        cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
+        with cbook.Locked(self.cachedir):
+            shutil.copyfile(converted, cached_file)
+
+    def _get_file_hash(self, path, block_size=2 ** 20):
+        if path in self.hash_cache:
+            return self.hash_cache[path]
+        _, ext = os.path.splitext(path)
+        version_tag = self.converter_version.get(ext)
+        if version_tag is None:
+            warnings.warn(
+                ("Don't know the external converter for files with extension "
+                 "%s, cannot ensure cache invalidation on version update. "
+                 "Either the relevant conversion program is missing or caching"
+                 " is attempted for an unknown file type.")
+                % ext)
+        result = self._get_file_hash_static(path, block_size, version_tag)
+        self.hash_cache[path] = result
+        return result
+
+    @staticmethod
+    def _get_file_hash_static(path, block_size, version_tag):
+        # the parts of _get_file_hash that are called from the deprecated
+        # compare.get_file_hash; can merge into _get_file_hash once that
+        # function is removed
+        md5 = hashlib.md5()
+        with open(path, 'rb') as fd:
+            while True:
+                data = fd.read(block_size)
+                if not data:
+                    break
+                md5.update(data)
+        if version_tag is not None:
+            md5.update(version_tag)
+        return md5.hexdigest()
+
+    def report(self):
+        """Return information about the cache.
+
+        Returns
+        -------
+        r : dict
+            `r['gets']` is the set of files queried,
+            `r['hits']` is the set of files found in the cache
+        """
+        return dict(hits=self.hits, gets=self.gets)
+
+    def expire(self):
+        """Delete cached files until the disk usage is under the limit.
+
+        Orders files by access time, so the least recently used files
+        get deleted first.
+        """
+        with cbook.Locked(self.cachedir):
+            stats = {filename: os.stat(os.path.join(self.cachedir, filename))
+                     for filename in os.listdir(self.cachedir)
+                     if filename.endswith(self.cached_ext)}
+            usage = sum(f.st_size for f in stats.values())
+            to_free = usage - self.max_size
+            if to_free <= 0:
+                return
+
+            files = sorted(stats.keys(),
+                           key=lambda f: stats[f].st_atime,
+                           reverse=True)
+            while to_free > 0:
+                filename = files.pop()
+                os.remove(os.path.join(self.cachedir, filename))
+                to_free -= stats[filename].st_size
+
+    @staticmethod
+    def get_cache_dir():
+        cachedir = _get_cachedir()
+        if cachedir is None:
+            raise _CacheError('No suitable configuration directory')
+        cachedir = os.path.join(cachedir, 'test_cache')
+        return cachedir
+
+    def ensure_cache_dir(self):
+        if not os.path.exists(self.cachedir):
+            try:
+                cbook.mkdirs(self.cachedir)
+            except OSError as e:
+                raise _CacheError("Error creating cache directory %s: %s"
+                                  % (self.cachedir, str(e)))
+        if not os.access(self.cachedir, os.W_OK):
+            raise _CacheError("Cache directory %s not writable"
+                              % self.cachedir)
+
+
+class _CacheError(Exception):
+    def __init__(self, message):
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+# A global cache instance, set by the appropriate test runner.
+_conversion_cache = None
diff --git a/lib/matplotlib/testing/compare.py b/lib/matplotlib/testing/compare.py
@@ -5,21 +5,16 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
-import six
-
-import hashlib
 import os
-import shutil
 
 import numpy as np
 
 import matplotlib
+from matplotlib import cbook
 from matplotlib.compat import subprocess
+from matplotlib.testing import _conversion_cache as ccache
 from matplotlib.testing.exceptions import ImageComparisonFailure
 from matplotlib import _png
-from matplotlib import _get_cachedir
-from matplotlib import cbook
-from distutils import version
 
 __all__ = ['compare_float', 'compare_images', 'comparable_formats']
 
@@ -76,40 +71,6 @@ def compare_float(expected, actual, relTol=None, absTol=None):
     return msg or None
 
 
-def get_cache_dir():
-    cachedir = _get_cachedir()
-    if cachedir is None:
-        raise RuntimeError('Could not find a suitable configuration directory')
-    cache_dir = os.path.join(cachedir, 'test_cache')
-    if not os.path.exists(cache_dir):
-        try:
-            cbook.mkdirs(cache_dir)
-        except IOError:
-            return None
-    if not os.access(cache_dir, os.W_OK):
-        return None
-    return cache_dir
-
-
-def get_file_hash(path, block_size=2 ** 20):
-    md5 = hashlib.md5()
-    with open(path, 'rb') as fd:
-        while True:
-            data = fd.read(block_size)
-            if not data:
-                break
-            md5.update(data)
-
-    if path.endswith('.pdf'):
-        from matplotlib import checkdep_ghostscript
-        md5.update(checkdep_ghostscript()[1].encode('utf-8'))
-    elif path.endswith('.svg'):
-        from matplotlib import checkdep_inkscape
-        md5.update(checkdep_inkscape().encode('utf-8'))
-
-    return md5.hexdigest()
-
-
 def make_external_conversion_command(cmd):
     def convert(old, new):
         cmdline = cmd(old, new)
@@ -160,16 +121,39 @@ def comparable_formats():
     return ['png'] + list(converter)
 
 
-def convert(filename, cache):
+@cbook.deprecated('2.1')
+def get_cache_dir():
+    return ccache._ConversionCache.get_cache_dir()
+
+
+@cbook.deprecated('2.1')
+def get_file_hash(path, block_size=2 ** 20):
+    if path.endswith('.pdf'):
+        from matplotlib import checkdep_ghostscript
+        version_tag = checkdep_ghostscript()[1].encode('utf-8')
+    elif path.endswith('.svg'):
+        from matplotlib import checkdep_inkscape
+        version_tag = checkdep_inkscape().encode('utf-8')
+    else:
+        version_tag = None
+    return ccache._ConversionCache._get_file_hash_static(
+        path, block_size, version_tag)
+
+
+def convert(filename, cache=None):
     """
     Convert the named file into a png file.  Returns the name of the
     created file.
 
-    If *cache* is True, the result of the conversion is cached in
-    `matplotlib._get_cachedir() + '/test_cache/'`.  The caching is based
-    on a hash of the exact contents of the input file.  The is no limit
-    on the size of the cache, so it may need to be manually cleared
-    periodically.
+    Parameters
+    ----------
+    filename : str
+    cache : _ConversionCache, optional
+
+    Returns
+    -------
+    str
+        The converted file.
 
     """
     base, extension = filename.rsplit('.', 1)
@@ -190,23 +174,12 @@ def convert(filename, cache):
     # is out of date.
     if (not os.path.exists(newname) or
             os.stat(newname).st_mtime < os.stat(filename).st_mtime):
-        if cache:
-            cache_dir = get_cache_dir()
-        else:
-            cache_dir = None
-
-        if cache_dir is not None:
-            hash_value = get_file_hash(filename)
-            new_ext = os.path.splitext(newname)[1]
-            cached_file = os.path.join(cache_dir, hash_value + new_ext)
-            if os.path.exists(cached_file):
-                shutil.copyfile(cached_file, newname)
-                return newname
-
+        in_cache = cache and cache.get(filename, newname)
+        if in_cache:
+            return newname
         converter[extension](filename, newname)
-
-        if cache_dir is not None:
-            shutil.copyfile(newname, cached_file)
+        if cache:
+            cache.put(filename, newname)
 
     return newname
 
@@ -269,7 +242,7 @@ def calculate_rms(expectedImage, actualImage):
     return rms
 
 
-def compare_images(expected, actual, tol, in_decorator=False):
+def compare_images(expected, actual, tol, in_decorator=False, cache=None):
     """
     Compare two "image" files checking differences within a tolerance.
 
@@ -290,6 +263,7 @@ def compare_images(expected, actual, tol, in_decorator=False):
     in_decorator : bool
         If called from image_comparison decorator, this should be
         True. (default=False)
+    cache : matplotlib.testing._conversion_cache._ConversionCache, optional
 
     Example
     -------
@@ -311,8 +285,8 @@ def compare_images(expected, actual, tol, in_decorator=False):
         raise IOError('Baseline image %r does not exist.' % expected)
 
     if extension != 'png':
-        actual = convert(actual, False)
-        expected = convert(expected, True)
+        actual = convert(actual, cache)
+        expected = convert(expected, cache)
 
     # open the image files and remove the alpha channel (if it exists)
     expectedImage = _png.read_png_int(expected)