From 997df25fcc47e0f8d9350a582c4b4842f01c750f Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Fri, 15 Feb 2019 17:47:49 +0100 Subject: [PATCH] Make AFM parser both more compliant and less strict. See changelog entry. Also support comma as decimal separator in the floating-point fields, as it is used in certain real-world files. --- doc/api/next_api_changes/2019-02-16-AL.rst | 10 +++++ lib/matplotlib/afm.py | 47 +++++++++++++--------- lib/matplotlib/tests/test_afm.py | 11 +++-- 3 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 doc/api/next_api_changes/2019-02-16-AL.rst diff --git a/doc/api/next_api_changes/2019-02-16-AL.rst b/doc/api/next_api_changes/2019-02-16-AL.rst new file mode 100644 index 000000000000..fba6fbf24b7e --- /dev/null +++ b/doc/api/next_api_changes/2019-02-16-AL.rst @@ -0,0 +1,10 @@ +Changes in AFM parsing +`````````````````````` + +In accordance with the AFM spec, the AFM parser no longer truncates the +``UnderlinePosition`` and ``UnderlineThickness`` fields to integers. + +The ``Notice`` field (which can only be publically accessed by the deprecated +``afm.parse_afm`` API) is no longer decoded to a `str`, but instead kept as +`bytes`, to support non-conformant AFM files that use non-ASCII characters in +that field. diff --git a/lib/matplotlib/afm.py b/lib/matplotlib/afm.py index 321a33ade7f1..c2b0090adbcd 100644 --- a/lib/matplotlib/afm.py +++ b/lib/matplotlib/afm.py @@ -49,17 +49,24 @@ _log = logging.getLogger(__name__) -# some afm files have floats where we are expecting ints -- there is -# probably a better way to handle this (support floats, round rather -# than truncate). But I don't know what the best approach is now and -# this change to _to_int should at least prevent mpl from crashing on -# these JDH (2009-11-06) - def _to_int(x): + # Some AFM files have floats where we are expecting ints -- there is + # probably a better way to handle this (support floats, round rather + # than truncate). But I don't know what the best approach is now and + # this change to _to_int should at least prevent mpl from crashing on + # these JDH (2009-11-06) return int(float(x)) -_to_float = float +def _to_float(x): + # Some AFM files use "," instead of "." as decimal separator -- this + # shouldn't be ambiguous (unless someone is wicked enough to use "," as + # thousands separator...). + if isinstance(x, bytes): + # Encoding doesn't really matter -- if we have codepoints >127 the call + # to float() will error anyways. + x = x.decode('latin-1') + return float(x.replace(',', '.')) def _to_str(x): @@ -84,10 +91,8 @@ def _to_bool(s): def _sanity_check(fh): """ - Check if the file at least looks like AFM. - If not, raise `RuntimeError`. + Check if the file looks like AFM; if it doesn't, raise `RuntimeError`. """ - # Remember the file position in case the caller wants to # do something else with the file. pos = fh.tell() @@ -95,7 +100,6 @@ def _sanity_check(fh): line = next(fh) finally: fh.seek(pos, 0) - # AFM spec, Section 4: The StartFontMetrics keyword [followed by a # version number] must be the first line in the file, and the # EndFontMetrics keyword must be the last non-empty line in the @@ -122,7 +126,7 @@ def _parse_header(fh): XHeight, Ascender, Descender, StartCharMetrics """ - headerConverters = { + header_converters = { b'StartFontMetrics': _to_float, b'FontName': _to_str, b'FullName': _to_str, @@ -131,10 +135,13 @@ def _parse_header(fh): b'ItalicAngle': _to_float, b'IsFixedPitch': _to_bool, b'FontBBox': _to_list_of_ints, - b'UnderlinePosition': _to_int, - b'UnderlineThickness': _to_int, + b'UnderlinePosition': _to_float, + b'UnderlineThickness': _to_float, b'Version': _to_str, - b'Notice': _to_str, + # Some AFM files have non-ASCII characters (which are not allowed by + # the spec). Given that there is actually no public API to even access + # this field, just return it as straight bytes. + b'Notice': lambda x: x, b'EncodingScheme': _to_str, b'CapHeight': _to_float, # Is the second version a mistake, or b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS @@ -162,13 +169,15 @@ def _parse_header(fh): val = b'' try: - d[key] = headerConverters[key](val) - except ValueError: - _log.error('Value error parsing header in AFM: %s, %s', key, val) - continue + converter = header_converters[key] except KeyError: _log.error('Found an unknown keyword in AFM header (was %r)' % key) continue + try: + d[key] = converter(val) + except ValueError: + _log.error('Value error parsing header in AFM: %s, %s', key, val) + continue if key == b'StartCharMetrics': return d raise RuntimeError('Bad parse') diff --git a/lib/matplotlib/tests/test_afm.py b/lib/matplotlib/tests/test_afm.py index b2d800113b0d..0461515b3366 100644 --- a/lib/matplotlib/tests/test_afm.py +++ b/lib/matplotlib/tests/test_afm.py @@ -4,6 +4,9 @@ from matplotlib import font_manager as fm +# See note in afm.py re: use of comma as decimal separator in the +# UnderlineThickness field and re: use of non-ASCII characters in the Notice +# field. AFM_TEST_DATA = b"""StartFontMetrics 2.0 Comment Comments are ignored. Comment Creation Date:Mon Nov 13 12:34:11 GMT 2017 @@ -15,9 +18,9 @@ ItalicAngle 0.0 IsFixedPitch false UnderlinePosition -100 -UnderlineThickness 50 +UnderlineThickness 56,789 Version 001.000 -Notice Copyright (c) 2017 No one. +Notice Copyright \xa9 2017 No one. FontBBox 0 -321 1234 369 StartCharMetrics 3 C 0 ; WX 250 ; N space ; B 0 0 0 0 ; @@ -51,9 +54,9 @@ def test_parse_header(): b'ItalicAngle': 0.0, b'IsFixedPitch': False, b'UnderlinePosition': -100, - b'UnderlineThickness': 50, + b'UnderlineThickness': 56.789, b'Version': '001.000', - b'Notice': 'Copyright (c) 2017 No one.', + b'Notice': b'Copyright \xa9 2017 No one.', b'FontBBox': [0, -321, 1234, 369], b'StartCharMetrics': 3, }