diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst index 2e8294f1e414..a8c9e9cf9d59 100644 --- a/doc/users/whats_new/reproducible_ps_pdf.rst +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -1,10 +1,13 @@ -Reproducible PS and PDF output ------------------------------- +Reproducible PS, PDF and SVG output +----------------------------------- The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set the timestamp value in the PS and PDF outputs. See https://reproducible-builds.org/specs/source-date-epoch/ +Alternatively, calling ``savefig`` with ``metadata={'creationDate': None}`` +will omit the timestamp altogether. + The reproducibility of the output from the PS and PDF backends has so far been tested using various plot elements but only default values of options such as ``{ps,pdf}.fonttype`` that can affect the output at a @@ -12,3 +15,14 @@ low level, and not with the mathtext or usetex features. When matplotlib calls external tools (such as PS distillers or LaTeX) their versions need to be kept constant for reproducibility, and they may add sources of nondeterminism outside the control of matplotlib. + +For SVG output, the ``svg.hashsalt`` rc parameter has been added in an +earlier release. This parameter changes some random identifiers in the +SVG file to be deterministic. The downside of this setting is that if +more than one file is generated using deterministic identifiers +and they end up as parts of one larger document, the identifiers can +collide and cause the different parts to affect each other. + +These features are now enabled in the tests for the PDF and SVG +backends, so most test output files (but not all of them) are now +deterministic. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index f6fa7ea12345..a5e6253ca387 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -472,7 +472,6 @@ def __init__(self, filename, metadata=None): 'Pages': self.pagesObject} self.writeObject(self.rootObject, root) - revision = '' # get source date from SOURCE_DATE_EPOCH, if set # See https://reproducible-builds.org/specs/source-date-epoch/ source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") @@ -484,11 +483,13 @@ def __init__(self, filename, metadata=None): self.infoDict = { 'Creator': 'matplotlib %s, http://matplotlib.org' % __version__, - 'Producer': 'matplotlib pdf backend%s' % revision, + 'Producer': 'matplotlib pdf backend %s' % __version__, 'CreationDate': source_date } if metadata is not None: self.infoDict.update(metadata) + self.infoDict = {k: v for (k, v) in self.infoDict.items() + if v is not None} self.fontNames = {} # maps filenames to internal font names self.nextFont = 1 # next free internal font name @@ -2459,6 +2460,13 @@ def __init__(self, filename, keep_empty=True, metadata=None): 'Document Information Dictionary'), e.g.: `{'Creator': 'My software', 'Author': 'Me', 'Title': 'Awesome fig'}` + + The standard keys are `'Title'`, `'Author'`, `'Subject'`, + `'Keywords'`, `'Creator'`, `'Producer'`, `'CreationDate'`, + `'ModDate'`, and `'Trapped'`. Values have been predefined + for `'Creator'`, `'Producer'` and `'CreationDate'`. They + can be removed by setting them to `None`. + """ self._file = PdfFile(filename, metadata=metadata) self.keep_empty = keep_empty diff --git a/lib/matplotlib/testing/__init__.py b/lib/matplotlib/testing/__init__.py index 761c39473238..610977fd4664 100644 --- a/lib/matplotlib/testing/__init__.py +++ b/lib/matplotlib/testing/__init__.py @@ -136,6 +136,10 @@ def set_font_settings_for_testing(): rcParams['text.hinting_factor'] = 8 +def set_reproducibility_for_testing(): + rcParams['svg.hashsalt'] = 'matplotlib' + + def setup(): # The baseline images are created in this locale, so we should use # it during all of the tests. @@ -161,3 +165,4 @@ def setup(): rcdefaults() # Start with all defaults set_font_settings_for_testing() + set_reproducibility_for_testing() diff --git a/lib/matplotlib/testing/decorators.py b/lib/matplotlib/testing/decorators.py index 6af18dd59609..3065f37c8f92 100644 --- a/lib/matplotlib/testing/decorators.py +++ b/lib/matplotlib/testing/decorators.py @@ -298,7 +298,12 @@ def compare(self, idx, baseline, extension): remove_ticks_and_titles(fig) actual_fname = os.path.join(self.result_dir, baseline) + '.' + extension - fig.savefig(actual_fname, **self.savefig_kwargs) + kwargs = self.savefig_kwargs.copy() + if extension == 'pdf': + kwargs.setdefault('metadata', + {'Creator': None, 'Producer': None, + 'CreationDate': None}) + fig.savefig(actual_fname, **kwargs) expected_fname = self.copy_baseline(baseline, extension) raise_on_image_difference(expected_fname, actual_fname, self.tol)