Skip to content

Commit ecdfa65

Browse files
authored
Merge pull request #17233 from QuLogic/pgf-pdf-metadata
ENH: Improve PDF metadata support in PGF
2 parents 94c0721 + cf4a117 commit ecdfa65

File tree

7 files changed

+311
-129
lines changed

7 files changed

+311
-129
lines changed

doc/api/api_changes_3.3/deprecations.rst

+8
Original file line numberDiff line numberDiff line change
@@ -593,3 +593,11 @@ APIs which support the values True, False, and "TeX" for ``ismath``.
593593
``matplotlib.ttconv``
594594
~~~~~~~~~~~~~~~~~~~~~
595595
This module is deprecated.
596+
597+
Stricter PDF metadata keys in PGF
598+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
599+
Saving metadata in PDF with the PGF backend currently normalizes all keys to
600+
lowercase, unlike the PDF backend, which only accepts the canonical case. This
601+
is deprecated; in a future version, only the canonically cased keys listed in
602+
the PDF specification (and the `~.backend_pgf.PdfPages` documentation) will be
603+
accepted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Saving PDF metadata via PGF now consistent with PDF backend
2+
-----------------------------------------------------------
3+
4+
When saving PDF files using the PGF backend, passed metadata will be
5+
interpreted in the same way as with the PDF backend. Previously, this metadata
6+
was only accepted by the PGF backend when saving a multi-page PDF with
7+
`.backend_pgf.PdfPages`, but is now allowed when saving a single figure, as
8+
well.

lib/matplotlib/backends/backend_pdf.py

+106-60
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,110 @@ def _string_escape(match):
135135
assert False
136136

137137

138+
def _create_pdf_info_dict(backend, metadata):
139+
"""
140+
Create a PDF infoDict based on user-supplied metadata.
141+
142+
A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though
143+
the user metadata may override it. The date may be the current time, or a
144+
time set by the ``SOURCE_DATE_EPOCH`` environment variable.
145+
146+
Metadata is verified to have the correct keys and their expected types. Any
147+
unknown keys/types will raise a warning.
148+
149+
Parameters
150+
----------
151+
backend : str
152+
The name of the backend to use in the Producer value.
153+
metadata : Dict[str, Union[str, datetime, Name]]
154+
A dictionary of metadata supplied by the user with information
155+
following the PDF specification, also defined in
156+
`~.backend_pdf.PdfPages` below.
157+
158+
If any value is *None*, then the key will be removed. This can be used
159+
to remove any pre-defined values.
160+
161+
Returns
162+
-------
163+
Dict[str, Union[str, datetime, Name]]
164+
A validated dictionary of metadata.
165+
"""
166+
167+
# get source date from SOURCE_DATE_EPOCH, if set
168+
# See https://reproducible-builds.org/specs/source-date-epoch/
169+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
170+
if source_date_epoch:
171+
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
172+
source_date = source_date.replace(tzinfo=UTC)
173+
else:
174+
source_date = datetime.today()
175+
176+
info = {
177+
'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org',
178+
'Producer': f'Matplotlib {backend} backend v{mpl.__version__}',
179+
'CreationDate': source_date,
180+
**metadata
181+
}
182+
info = {k: v for (k, v) in info.items() if v is not None}
183+
184+
def is_string_like(x):
185+
return isinstance(x, str)
186+
187+
def is_date(x):
188+
return isinstance(x, datetime)
189+
190+
def check_trapped(x):
191+
if isinstance(x, Name):
192+
return x.name in (b'True', b'False', b'Unknown')
193+
else:
194+
return x in ('True', 'False', 'Unknown')
195+
196+
keywords = {
197+
'Title': is_string_like,
198+
'Author': is_string_like,
199+
'Subject': is_string_like,
200+
'Keywords': is_string_like,
201+
'Creator': is_string_like,
202+
'Producer': is_string_like,
203+
'CreationDate': is_date,
204+
'ModDate': is_date,
205+
'Trapped': check_trapped,
206+
}
207+
for k in info:
208+
if k not in keywords:
209+
cbook._warn_external(f'Unknown infodict keyword: {k}')
210+
elif not keywords[k](info[k]):
211+
cbook._warn_external(f'Bad value for infodict keyword {k}')
212+
if 'Trapped' in info:
213+
info['Trapped'] = Name(info['Trapped'])
214+
215+
return info
216+
217+
218+
def _datetime_to_pdf(d):
219+
"""
220+
Convert a datetime to a PDF string representing it.
221+
222+
Used for PDF and PGF.
223+
"""
224+
r = d.strftime('D:%Y%m%d%H%M%S')
225+
z = d.utcoffset()
226+
if z is not None:
227+
z = z.seconds
228+
else:
229+
if time.daylight:
230+
z = time.altzone
231+
else:
232+
z = time.timezone
233+
if z == 0:
234+
r += 'Z'
235+
elif z < 0:
236+
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
237+
else:
238+
r += "-%02d'%02d'" % (z // 3600, z % 3600)
239+
return r
240+
241+
138242
def pdfRepr(obj):
139243
"""Map Python objects to PDF syntax."""
140244

@@ -199,22 +303,7 @@ def pdfRepr(obj):
199303

200304
# A date.
201305
elif isinstance(obj, datetime):
202-
r = obj.strftime('D:%Y%m%d%H%M%S')
203-
z = obj.utcoffset()
204-
if z is not None:
205-
z = z.seconds
206-
else:
207-
if time.daylight:
208-
z = time.altzone
209-
else:
210-
z = time.timezone
211-
if z == 0:
212-
r += 'Z'
213-
elif z < 0:
214-
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
215-
else:
216-
r += "-%02d'%02d'" % (z // 3600, z % 3600)
217-
return pdfRepr(r)
306+
return pdfRepr(_datetime_to_pdf(obj))
218307

219308
# A bounding box
220309
elif isinstance(obj, BboxBase):
@@ -503,24 +592,7 @@ def __init__(self, filename, metadata=None):
503592
'Pages': self.pagesObject}
504593
self.writeObject(self.rootObject, root)
505594

506-
# get source date from SOURCE_DATE_EPOCH, if set
507-
# See https://reproducible-builds.org/specs/source-date-epoch/
508-
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
509-
if source_date_epoch:
510-
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
511-
source_date = source_date.replace(tzinfo=UTC)
512-
else:
513-
source_date = datetime.today()
514-
515-
self.infoDict = {
516-
'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org',
517-
'Producer': f'matplotlib pdf backend {mpl.__version__}',
518-
'CreationDate': source_date
519-
}
520-
if metadata is not None:
521-
self.infoDict.update(metadata)
522-
self.infoDict = {k: v for (k, v) in self.infoDict.items()
523-
if v is not None}
595+
self.infoDict = _create_pdf_info_dict('pdf', metadata or {})
524596

525597
self.fontNames = {} # maps filenames to internal font names
526598
self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1))
@@ -1640,32 +1712,6 @@ def writeXref(self):
16401712
def writeInfoDict(self):
16411713
"""Write out the info dictionary, checking it for good form"""
16421714

1643-
def is_string_like(x):
1644-
return isinstance(x, str)
1645-
1646-
def is_date(x):
1647-
return isinstance(x, datetime)
1648-
1649-
check_trapped = (lambda x: isinstance(x, Name) and
1650-
x.name in ('True', 'False', 'Unknown'))
1651-
1652-
keywords = {'Title': is_string_like,
1653-
'Author': is_string_like,
1654-
'Subject': is_string_like,
1655-
'Keywords': is_string_like,
1656-
'Creator': is_string_like,
1657-
'Producer': is_string_like,
1658-
'CreationDate': is_date,
1659-
'ModDate': is_date,
1660-
'Trapped': check_trapped}
1661-
for k in self.infoDict:
1662-
if k not in keywords:
1663-
cbook._warn_external('Unknown infodict keyword: %s' % k)
1664-
else:
1665-
if not keywords[k](self.infoDict[k]):
1666-
cbook._warn_external(
1667-
'Bad value for infodict keyword %s' % k)
1668-
16691715
self.infoObject = self.reserveObject('info')
16701716
self.writeObject(self.infoObject, self.infoDict)
16711717

lib/matplotlib/backends/backend_pgf.py

+49-22
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import atexit
22
import codecs
3+
import datetime
34
import functools
45
import logging
56
import math
@@ -20,6 +21,8 @@
2021
_Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase,
2122
RendererBase)
2223
from matplotlib.backends.backend_mixed import MixedModeRenderer
24+
from matplotlib.backends.backend_pdf import (
25+
_create_pdf_info_dict, _datetime_to_pdf)
2326
from matplotlib.path import Path
2427
from matplotlib.figure import Figure
2528
from matplotlib._pylab_helpers import Gcf
@@ -157,6 +160,17 @@ def _font_properties_str(prop):
157160
return "".join(commands)
158161

159162

163+
def _metadata_to_str(key, value):
164+
"""Convert metadata key/value to a form that hyperref accepts."""
165+
if isinstance(value, datetime.datetime):
166+
value = _datetime_to_pdf(value)
167+
elif key == 'Trapped':
168+
value = value.name.decode('ascii')
169+
else:
170+
value = str(value)
171+
return f'{key}={{{value}}}'
172+
173+
160174
def make_pdf_to_png_converter():
161175
"""Return a function that converts a pdf file to a png file."""
162176
if shutil.which("pdftocairo"):
@@ -867,9 +881,13 @@ def print_pgf(self, fname_or_fh, *args, **kwargs):
867881
file = codecs.getwriter("utf-8")(file)
868882
self._print_pgf_to_fh(file, *args, **kwargs)
869883

870-
def _print_pdf_to_fh(self, fh, *args, **kwargs):
884+
def _print_pdf_to_fh(self, fh, *args, metadata=None, **kwargs):
871885
w, h = self.figure.get_figwidth(), self.figure.get_figheight()
872886

887+
info_dict = _create_pdf_info_dict('pgf', metadata or {})
888+
hyperref_options = ','.join(
889+
_metadata_to_str(k, v) for k, v in info_dict.items())
890+
873891
try:
874892
# create temporary directory for compiling the figure
875893
tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_")
@@ -883,6 +901,8 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs):
883901
latex_preamble = get_preamble()
884902
latex_fontspec = get_fontspec()
885903
latexcode = """
904+
\\PassOptionsToPackage{pdfinfo={%s}}{hyperref}
905+
\\RequirePackage{hyperref}
886906
\\documentclass[12pt]{minimal}
887907
\\usepackage[paperwidth=%fin, paperheight=%fin, margin=0in]{geometry}
888908
%s
@@ -892,7 +912,7 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs):
892912
\\begin{document}
893913
\\centering
894914
\\input{figure.pgf}
895-
\\end{document}""" % (w, h, latex_preamble, latex_fontspec)
915+
\\end{document}""" % (hyperref_options, w, h, latex_preamble, latex_fontspec)
896916
pathlib.Path(fname_tex).write_text(latexcode, encoding="utf-8")
897917

898918
texcommand = mpl.rcParams["pgf.texsystem"]
@@ -989,7 +1009,8 @@ class PdfPages:
9891009
'_fname_pdf',
9901010
'_n_figures',
9911011
'_file',
992-
'metadata',
1012+
'_info_dict',
1013+
'_metadata',
9931014
)
9941015

9951016
def __init__(self, filename, *, keep_empty=True, metadata=None):
@@ -1017,7 +1038,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10171038
self._outputfile = filename
10181039
self._n_figures = 0
10191040
self.keep_empty = keep_empty
1020-
self.metadata = metadata or {}
1041+
self._metadata = (metadata or {}).copy()
1042+
if metadata:
1043+
for key in metadata:
1044+
canonical = {
1045+
'creationdate': 'CreationDate',
1046+
'moddate': 'ModDate',
1047+
}.get(key.lower(), key.lower().title())
1048+
if canonical != key:
1049+
cbook.warn_deprecated(
1050+
'3.3', message='Support for setting PDF metadata keys '
1051+
'case-insensitively is deprecated since %(since)s and '
1052+
'will be removed %(removal)s; '
1053+
f'set {canonical} instead of {key}.')
1054+
self._metadata[canonical] = self._metadata.pop(key)
1055+
self._info_dict = _create_pdf_info_dict('pgf', self._metadata)
10211056

10221057
# create temporary directory for compiling the figure
10231058
self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_")
@@ -1026,29 +1061,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10261061
self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf")
10271062
self._file = open(self._fname_tex, 'wb')
10281063

1064+
@cbook.deprecated('3.3')
1065+
@property
1066+
def metadata(self):
1067+
return self._metadata
1068+
10291069
def _write_header(self, width_inches, height_inches):
1030-
supported_keys = {
1031-
'title', 'author', 'subject', 'keywords', 'creator',
1032-
'producer', 'trapped'
1033-
}
1034-
infoDict = {
1035-
'creator': f'matplotlib {mpl.__version__}, https://matplotlib.org',
1036-
'producer': f'matplotlib pgf backend {mpl.__version__}',
1037-
}
1038-
metadata = {k.lower(): v for k, v in self.metadata.items()}
1039-
infoDict.update(metadata)
1040-
hyperref_options = ''
1041-
for k, v in infoDict.items():
1042-
if k not in supported_keys:
1043-
raise ValueError(
1044-
'Not a supported pdf metadata field: "{}"'.format(k)
1045-
)
1046-
hyperref_options += 'pdf' + k + '={' + str(v) + '},'
1070+
hyperref_options = ','.join(
1071+
_metadata_to_str(k, v) for k, v in self._info_dict.items())
10471072

10481073
latex_preamble = get_preamble()
10491074
latex_fontspec = get_fontspec()
10501075
latex_header = r"""\PassOptionsToPackage{{
1051-
{metadata}
1076+
pdfinfo={{
1077+
{metadata}
1078+
}}
10521079
}}{{hyperref}}
10531080
\RequirePackage{{hyperref}}
10541081
\documentclass[12pt]{{minimal}}

0 commit comments

Comments
 (0)