From 529afdc7a2d018f01f8f9fc21129ff57a292b8d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 5 Jan 2017 09:21:44 +0200 Subject: [PATCH 1/7] Allow deleting pdf metadata keys And document the standard keys in PdfPages. Also insert the matplotlib version in the default value of Producer as well as Creator, to retain a debugging clue in case the user overrides one of them. --- lib/matplotlib/backends/backend_pdf.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index f6fa7ea12345..a5e6253ca387 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -472,7 +472,6 @@ def __init__(self, filename, metadata=None): 'Pages': self.pagesObject} self.writeObject(self.rootObject, root) - revision = '' # get source date from SOURCE_DATE_EPOCH, if set # See https://reproducible-builds.org/specs/source-date-epoch/ source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") @@ -484,11 +483,13 @@ def __init__(self, filename, metadata=None): self.infoDict = { 'Creator': 'matplotlib %s, http://matplotlib.org' % __version__, - 'Producer': 'matplotlib pdf backend%s' % revision, + 'Producer': 'matplotlib pdf backend %s' % __version__, 'CreationDate': source_date } if metadata is not None: self.infoDict.update(metadata) + self.infoDict = {k: v for (k, v) in self.infoDict.items() + if v is not None} self.fontNames = {} # maps filenames to internal font names self.nextFont = 1 # next free internal font name @@ -2459,6 +2460,13 @@ def __init__(self, filename, keep_empty=True, metadata=None): 'Document Information Dictionary'), e.g.: `{'Creator': 'My software', 'Author': 'Me', 'Title': 'Awesome fig'}` + + The standard keys are `'Title'`, `'Author'`, `'Subject'`, + `'Keywords'`, `'Creator'`, `'Producer'`, `'CreationDate'`, + `'ModDate'`, and `'Trapped'`. Values have been predefined + for `'Creator'`, `'Producer'` and `'CreationDate'`. They + can be removed by setting them to `None`. + """ self._file = PdfFile(filename, metadata=metadata) self.keep_empty = keep_empty From 8c7adbb10b86f6ab83d79ca3c24cf5c96db9d65f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 5 Jan 2017 09:22:19 +0200 Subject: [PATCH 2/7] Make pdf test results (more) deterministic Remove the metadata entries that depend on the date and the exact version. --- lib/matplotlib/testing/decorators.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/testing/decorators.py b/lib/matplotlib/testing/decorators.py index 6af18dd59609..3065f37c8f92 100644 --- a/lib/matplotlib/testing/decorators.py +++ b/lib/matplotlib/testing/decorators.py @@ -298,7 +298,12 @@ def compare(self, idx, baseline, extension): remove_ticks_and_titles(fig) actual_fname = os.path.join(self.result_dir, baseline) + '.' + extension - fig.savefig(actual_fname, **self.savefig_kwargs) + kwargs = self.savefig_kwargs.copy() + if extension == 'pdf': + kwargs.setdefault('metadata', + {'Creator': None, 'Producer': None, + 'CreationDate': None}) + fig.savefig(actual_fname, **kwargs) expected_fname = self.copy_baseline(baseline, extension) raise_on_image_difference(expected_fname, actual_fname, self.tol) From 93b2eea116e9ee51266823142b541bc671cb3d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 5 Jan 2017 18:53:14 +0200 Subject: [PATCH 3/7] Reproducible svg --- lib/matplotlib/testing/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/matplotlib/testing/__init__.py b/lib/matplotlib/testing/__init__.py index 761c39473238..610977fd4664 100644 --- a/lib/matplotlib/testing/__init__.py +++ b/lib/matplotlib/testing/__init__.py @@ -136,6 +136,10 @@ def set_font_settings_for_testing(): rcParams['text.hinting_factor'] = 8 +def set_reproducibility_for_testing(): + rcParams['svg.hashsalt'] = 'matplotlib' + + def setup(): # The baseline images are created in this locale, so we should use # it during all of the tests. @@ -161,3 +165,4 @@ def setup(): rcdefaults() # Start with all defaults set_font_settings_for_testing() + set_reproducibility_for_testing() From eab297c8717e9815bb7a2ee18e3c83d586b37ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 5 Jan 2017 19:16:21 +0200 Subject: [PATCH 4/7] Document the changes in whats_new --- doc/users/whats_new/reproducible_ps_pdf.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst index 2e8294f1e414..b29395d8536f 100644 --- a/doc/users/whats_new/reproducible_ps_pdf.rst +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -1,10 +1,13 @@ -Reproducible PS and PDF output ------------------------------- +Reproducible PS, PDF and SVG output +----------------------------------- The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set the timestamp value in the PS and PDF outputs. See https://reproducible-builds.org/specs/source-date-epoch/ +Alternatively, calling ``savefig`` with ``metadata={creationDate=None}`` +will omit the timestamp altogether. + The reproducibility of the output from the PS and PDF backends has so far been tested using various plot elements but only default values of options such as ``{ps,pdf}.fonttype`` that can affect the output at a @@ -12,3 +15,12 @@ low level, and not with the mathtext or usetex features. When matplotlib calls external tools (such as PS distillers or LaTeX) their versions need to be kept constant for reproducibility, and they may add sources of nondeterminism outside the control of matplotlib. + +For SVG output, the ``svg.hashsalt`` rc parameter has been added in an +earlier release. In can be used to change some random id values in the +output to be deterministic, at the cost that including multiple such +svg files in one document can lead to collisions. + +These features are now enabled in the tests for the pdf and svg +backends, so most test output files (but not all of them) are now +deterministic. From ec941b49f91223a0ac1a954dc52c1a71fda18d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Fri, 20 Jan 2017 12:08:46 +0200 Subject: [PATCH 5/7] Fix documentation mistakes --- doc/users/whats_new/reproducible_ps_pdf.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst index b29395d8536f..224c4b0cc4a0 100644 --- a/doc/users/whats_new/reproducible_ps_pdf.rst +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -5,7 +5,7 @@ The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set the timestamp value in the PS and PDF outputs. See https://reproducible-builds.org/specs/source-date-epoch/ -Alternatively, calling ``savefig`` with ``metadata={creationDate=None}`` +Alternatively, calling ``savefig`` with ``metadata={'creationDate': None}`` will omit the timestamp altogether. The reproducibility of the output from the PS and PDF backends has so @@ -17,10 +17,10 @@ versions need to be kept constant for reproducibility, and they may add sources of nondeterminism outside the control of matplotlib. For SVG output, the ``svg.hashsalt`` rc parameter has been added in an -earlier release. In can be used to change some random id values in the +earlier release. It can be used to change some random id values in the output to be deterministic, at the cost that including multiple such -svg files in one document can lead to collisions. +SVG files in one document can lead to collisions. -These features are now enabled in the tests for the pdf and svg +These features are now enabled in the tests for the PDF and SVG backends, so most test output files (but not all of them) are now deterministic. From 749e81f60fb9bfb3d9b5be6631efca5e88ad7f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Fri, 20 Jan 2017 22:45:26 +0200 Subject: [PATCH 6/7] Try to improve wording --- doc/users/whats_new/reproducible_ps_pdf.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst index 224c4b0cc4a0..8d84ef994b2f 100644 --- a/doc/users/whats_new/reproducible_ps_pdf.rst +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -17,9 +17,11 @@ versions need to be kept constant for reproducibility, and they may add sources of nondeterminism outside the control of matplotlib. For SVG output, the ``svg.hashsalt`` rc parameter has been added in an -earlier release. It can be used to change some random id values in the -output to be deterministic, at the cost that including multiple such -SVG files in one document can lead to collisions. +earlier release. This parameter changes some random identifiers in the +SVG file to be deterministic. The downside of this setting is that if +more than one file is generated using with deterministic identifiers +and they end up as parts of one larger document, the identifiers can +collide and cause the different parts to affect each other. These features are now enabled in the tests for the PDF and SVG backends, so most test output files (but not all of them) are now From 6736f7de020b400ac096c567b9a9da9022fef888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sat, 21 Jan 2017 10:32:08 +0200 Subject: [PATCH 7/7] Another wording fix --- doc/users/whats_new/reproducible_ps_pdf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst index 8d84ef994b2f..a8c9e9cf9d59 100644 --- a/doc/users/whats_new/reproducible_ps_pdf.rst +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -19,7 +19,7 @@ add sources of nondeterminism outside the control of matplotlib. For SVG output, the ``svg.hashsalt`` rc parameter has been added in an earlier release. This parameter changes some random identifiers in the SVG file to be deterministic. The downside of this setting is that if -more than one file is generated using with deterministic identifiers +more than one file is generated using deterministic identifiers and they end up as parts of one larger document, the identifiers can collide and cause the different parts to affect each other.