Recognize abbreviations of PostScript code

jkseppan · jkseppan · commit e98bb8338472 · 2021-08-29T09:51:38.000+03:00
Type-1 fonts are required to have subroutines with specific contents
but their names may vary. They are usually ND, NP and RD but names
like | and |- appear too.
diff --git a/lib/matplotlib/tests/test_type1font.py b/lib/matplotlib/tests/test_type1font.py
@@ -42,6 +42,7 @@ def test_Type1Font():
     assert slanted.prop['ItalicAngle'] == -45
     assert font.prop['Encoding'][5] == 'Pi'
     assert isinstance(font.prop['CharStrings']['Pi'], bytes)
+    assert font._abbr['ND'] == 'ND'
 
     differ = difflib.Differ()
     diff = list(differ.compare(
@@ -85,6 +86,7 @@ def test_Type1Font_2():
     assert font.prop['Encoding'][65] == 'A'  # the font uses StandardEncoding
     (pos0, pos1), = font._pos['Encoding']
     assert font.parts[0][pos0:pos1] == b'/Encoding StandardEncoding'
+    assert font._abbr['ND'] == '|-'
 
 
 def test_tokenize():
diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py
@@ -344,11 +344,14 @@ class Type1Font:
           Subrs - array of byte code subroutines
           OtherSubrs - bytes object encoding some PostScript code
     """
-    __slots__ = ('parts', 'decrypted', 'prop', '_pos')
+    __slots__ = ('parts', 'decrypted', 'prop', '_pos', '_abbr')
     # the _pos dict contains (begin, end) indices to parts[0] + decrypted
     # so that they can be replaced when transforming the font;
     # but since sometimes a definition appears in both parts[0] and decrypted,
     # _pos[name] is an array of such pairs
+    #
+    # _abbr maps three standard abbreviations to their particular names in
+    # this font (e.g. 'RD' is named '-|' in some fonts)
 
     def __init__(self, input):
         """
@@ -368,6 +371,7 @@ def __init__(self, input):
             self.parts = self._split(data)
 
         self.decrypted = self._decrypt(self.parts[1], 'eexec')
+        self._abbr = {'RD': 'RD', 'ND': 'ND', 'NP': 'NP'}
         self._parse()
 
     def _read(self, file):
@@ -552,10 +556,18 @@ def _parse(self):
                 break
 
             # sometimes noaccess def and readonly def are abbreviated
-            if kw.is_name(b'def', b'ND', b'RD', b'|-'):
+            if kw.is_keyword('def', self._abbr['ND'], self._abbr['NP']):
                 prop[key] = value
                 pos.setdefault(key, []).append((keypos, kw.endpos()))
 
+            # detect the standard abbreviations
+            if value == '{noaccess def}':
+                self._abbr['ND'] = key
+            elif value == '{noaccess put}':
+                self._abbr['NP'] = key
+            elif value == '{string currentfile exch readstring pop}':
+                self._abbr['RD'] = key
+
         # Fill in the various *Name properties
         if 'FontName' not in prop:
             prop['FontName'] = (prop.get('FullName') or
@@ -604,9 +616,14 @@ def _parse_subrs(self, tokens, _data):
                     "Second token following dup in Subrs definition must "
                     f"be a number, was {nbytes_token}"
                 )
-            token = next(tokens)  # usually RD or |- but the font can define this to be anything
-            binary_token = tokens.send(1+nbytes_token.numeric_value())
-            array[index_token.numeric_value()] = binary_token.value[1:]
+            token = next(tokens)
+            if not token.is_keyword(self._abbr['RD']):
+                raise RuntimeError(
+                    f"Token preceding subr must be {self._abbr['RD']}, "
+                    f"was {token}"
+                )
+            binary_token = tokens.send(1+nbytes_token.value())
+            array[index_token.value()] = binary_token.value()
 
         return array, next(tokens).endpos()