Skip to content

Make AFM parser both more compliant and less strict. #13441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/api/next_api_changes/2019-02-16-AL.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Changes in AFM parsing
``````````````````````

In accordance with the AFM spec, the AFM parser no longer truncates the
``UnderlinePosition`` and ``UnderlineThickness`` fields to integers.

The ``Notice`` field (which can only be publically accessed by the deprecated
``afm.parse_afm`` API) is no longer decoded to a `str`, but instead kept as
`bytes`, to support non-conformant AFM files that use non-ASCII characters in
that field.
47 changes: 28 additions & 19 deletions lib/matplotlib/afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,24 @@
_log = logging.getLogger(__name__)


# some afm files have floats where we are expecting ints -- there is
# probably a better way to handle this (support floats, round rather
# than truncate). But I don't know what the best approach is now and
# this change to _to_int should at least prevent mpl from crashing on
# these JDH (2009-11-06)

def _to_int(x):
# Some AFM files have floats where we are expecting ints -- there is
# probably a better way to handle this (support floats, round rather
# than truncate). But I don't know what the best approach is now and
# this change to _to_int should at least prevent mpl from crashing on
# these JDH (2009-11-06)
return int(float(x))


_to_float = float
def _to_float(x):
# Some AFM files use "," instead of "." as decimal separator -- this
# shouldn't be ambiguous (unless someone is wicked enough to use "," as
# thousands separator...).
if isinstance(x, bytes):
# Encoding doesn't really matter -- if we have codepoints >127 the call
# to float() will error anyways.
x = x.decode('latin-1')
return float(x.replace(',', '.'))


def _to_str(x):
Expand All @@ -84,18 +91,15 @@ def _to_bool(s):

def _sanity_check(fh):
"""
Check if the file at least looks like AFM.
If not, raise `RuntimeError`.
Check if the file looks like AFM; if it doesn't, raise `RuntimeError`.
"""

# Remember the file position in case the caller wants to
# do something else with the file.
pos = fh.tell()
try:
line = next(fh)
finally:
fh.seek(pos, 0)

# AFM spec, Section 4: The StartFontMetrics keyword [followed by a
# version number] must be the first line in the file, and the
# EndFontMetrics keyword must be the last non-empty line in the
Expand All @@ -122,7 +126,7 @@ def _parse_header(fh):
XHeight, Ascender, Descender, StartCharMetrics

"""
headerConverters = {
header_converters = {
b'StartFontMetrics': _to_float,
b'FontName': _to_str,
b'FullName': _to_str,
Expand All @@ -131,10 +135,13 @@ def _parse_header(fh):
b'ItalicAngle': _to_float,
b'IsFixedPitch': _to_bool,
b'FontBBox': _to_list_of_ints,
b'UnderlinePosition': _to_int,
b'UnderlineThickness': _to_int,
b'UnderlinePosition': _to_float,
b'UnderlineThickness': _to_float,
b'Version': _to_str,
b'Notice': _to_str,
# Some AFM files have non-ASCII characters (which are not allowed by
# the spec). Given that there is actually no public API to even access
# this field, just return it as straight bytes.
b'Notice': lambda x: x,
b'EncodingScheme': _to_str,
b'CapHeight': _to_float, # Is the second version a mistake, or
b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
Expand Down Expand Up @@ -162,13 +169,15 @@ def _parse_header(fh):
val = b''

try:
d[key] = headerConverters[key](val)
except ValueError:
_log.error('Value error parsing header in AFM: %s, %s', key, val)
continue
converter = header_converters[key]
except KeyError:
_log.error('Found an unknown keyword in AFM header (was %r)' % key)
continue
try:
d[key] = converter(val)
except ValueError:
_log.error('Value error parsing header in AFM: %s, %s', key, val)
continue
if key == b'StartCharMetrics':
return d
raise RuntimeError('Bad parse')
Expand Down
11 changes: 7 additions & 4 deletions lib/matplotlib/tests/test_afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from matplotlib import font_manager as fm


# See note in afm.py re: use of comma as decimal separator in the
# UnderlineThickness field and re: use of non-ASCII characters in the Notice
# field.
AFM_TEST_DATA = b"""StartFontMetrics 2.0
Comment Comments are ignored.
Comment Creation Date:Mon Nov 13 12:34:11 GMT 2017
Expand All @@ -15,9 +18,9 @@
ItalicAngle 0.0
IsFixedPitch false
UnderlinePosition -100
UnderlineThickness 50
UnderlineThickness 56,789
Version 001.000
Notice Copyright (c) 2017 No one.
Notice Copyright \xa9 2017 No one.
FontBBox 0 -321 1234 369
StartCharMetrics 3
C 0 ; WX 250 ; N space ; B 0 0 0 0 ;
Expand Down Expand Up @@ -51,9 +54,9 @@ def test_parse_header():
b'ItalicAngle': 0.0,
b'IsFixedPitch': False,
b'UnderlinePosition': -100,
b'UnderlineThickness': 50,
b'UnderlineThickness': 56.789,
b'Version': '001.000',
b'Notice': 'Copyright (c) 2017 No one.',
b'Notice': b'Copyright \xa9 2017 No one.',
b'FontBBox': [0, -321, 1234, 369],
b'StartCharMetrics': 3,
}
Expand Down