Fix loading of encoded fonts in textpath.

anntzer · anntzer · commit fcd3bf655d0b · 2018-12-07T15:57:27.000+01:00
Consider the following example.

    import matplotlib.pyplot as plt
    plt.rcParams['text.usetex'] = True
    plt.rcParams['text.latex.preamble'] = r'\usepackage{siunitx}'
    plt.rcParams['text.hinting_factor'] = 1
    plt.text(.5, .5, r'$\si{\degree}$')
    plt.text(.5, .4, r'ff\textwon')
    plt.gca().set_axis_off()
    plt.savefig('/tmp/plot.svg')
    plt.savefig('/tmp/plot.pdf')
    plt.savefig('/tmp/plot.png')
    plt.show()

In the svg output, one sees that the \degree and \textwon characters
(which come from a different font that the ff ligature) are now
correctly loaded, *but* at a too small size -- this still needs to be
fixed.
(pdf and png output are unaffected.)
diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
@@ -968,10 +968,36 @@ def _parse(file):
             raise ValueError("Cannot locate end of encoding in {}"
                              .format(file))
         data = data[:end]
-
         return re.findall(br'/([^][{}<>\s]+)', data)
 
 
+# Note: this function should ultimately replace the Encoding class, which
+# appears to be mostly broken: because it uses b''.join(), there is no
+# whitespace left between glyph names (only slashes) so the final re.findall
+# returns a single string with all glyph names.  However this does not appear
+# to bother backend_pdf, so that needs to be investigated more.  (The fixed
+# version below is necessary for textpath/backend_svg, though.)
+def _parse_enc(path):
+    r"""
+    Parses a \*.enc file referenced from a psfonts.map style file.
+    The format this class understands is a very limited subset of PostScript.
+
+    Parameters
+    ----------
+    path : os.PathLike
+
+    Returns
+    -------
+    encoding : list
+        The nth entry of the list is the PostScript glyph name of the nth
+        glyph.
+    """
+    with open(path, encoding="ascii") as file:
+        no_comments = "\n".join(line.split("%")[0].rstrip() for line in file)
+    array = re.search(r"(?s)\[(.*)\]", no_comments).group(1)
+    return re.findall(r"(?<=/)[A-za-z0-9._]+", array)
+
+
 @lru_cache()
 def find_tex_file(filename, format=None):
     """
diff --git a/lib/matplotlib/textpath.py b/lib/matplotlib/textpath.py
@@ -16,13 +16,6 @@
 _log = logging.getLogger(__name__)
 
 
-@functools.lru_cache(1)
-def _get_adobe_standard_encoding():
-    enc_name = dviread.find_tex_file('8a.enc')
-    enc = dviread.Encoding(enc_name)
-    return {c: i for i, c in enumerate(enc.encoding)}
-
-
 class TextToPath(object):
     """
     A class that convert a given text to a path using ttf fonts.
@@ -297,12 +290,8 @@ def get_texmanager(self):
 
     def get_glyphs_tex(self, prop, s, glyph_map=None,
                        return_new_glyphs_only=False):
-        """
-        convert the string *s* to vertices and codes using matplotlib's usetex
-        mode.
-        """
-
-        # codes are modstly borrowed from pdf backend.
+        """Convert the string *s* to vertices and codes using usetex mode."""
+        # Mostly borrowed from pdf backend.
 
         dvifile = self.get_texmanager().make_dvi(s, self.FONT_SCALE)
         with dviread.Dvi(dvifile, self.DPI) as dvi:
@@ -320,29 +309,20 @@ def get_glyphs_tex(self, prop, s, glyph_map=None,
 
         # Gather font information and do some setup for combining
         # characters into strings.
-        # oldfont, seq = None, []
         for x1, y1, dvifont, glyph, width in page.text:
             font, enc = self._get_ps_font_and_encoding(dvifont.texname)
             char_id = self._get_char_id_ps(font, glyph)
 
             if char_id not in glyph_map:
                 font.clear()
                 font.set_size(self.FONT_SCALE, self.DPI)
-                if enc:
-                    charcode = enc.get(glyph, None)
-                else:
-                    charcode = glyph
-
-                ft2font_flag = LOAD_TARGET_LIGHT
-                if charcode is not None:
-                    glyph0 = font.load_char(charcode, flags=ft2font_flag)
+                # See comments in _get_ps_font_and_encoding.
+                if enc is not None:
+                    index = font.get_name_index(enc[glyph])
+                    font.load_glyph(index, flags=LOAD_TARGET_LIGHT)
                 else:
-                    _log.warning("The glyph (%d) of font (%s) cannot be "
-                                 "converted with the encoding. Glyph may "
-                                 "be wrong.", glyph, font.fname)
-
-                    glyph0 = font.load_char(glyph, flags=ft2font_flag)
-
+                    index = glyph
+                    font.load_char(index, flags=LOAD_TARGET_LIGHT)
                 glyph_map_new[char_id] = self.glyph_to_path(font)
 
             glyph_ids.append(char_id)
@@ -370,31 +350,41 @@ def _get_ps_font_and_encoding(texname):
         font_bunch = tex_font_map[texname]
         if font_bunch.filename is None:
             raise ValueError(
-                ("No usable font file found for %s (%s). "
-                    "The font may lack a Type-1 version.")
-                % (font_bunch.psname, texname))
+                "No usable font file found for {} ({}). "
+                "The font may lack a Type-1 version."
+                .format(font_bunch.psname, texname))
 
         font = get_font(font_bunch.filename)
 
-        for charmap_name, charmap_code in [("ADOBE_CUSTOM", 1094992451),
-                                           ("ADOBE_STANDARD", 1094995778)]:
-            try:
-                font.select_charmap(charmap_code)
-            except (ValueError, RuntimeError):
-                pass
-            else:
-                break
+        if font_bunch.encoding:
+            # If psfonts.map specifies an encoding, use it: it gives us a
+            # mapping of glyph indices to Adobe glyph names; use it to convert
+            # dvi indices to glyph names and use the FreeType-synthesized
+            # unicode charmap to convert glyph names to glyph indices (with
+            # FT_Get_Name_Index/get_name_index), and load the glyph using
+            # FT_Load_Glyph/load_glyph.  (That charmap has a coverage at least
+            # as good as, and possibly better than, the native charmaps.)
+            enc = dviread._parse_enc(font_bunch.encoding)
         else:
-            charmap_name = ""
-            _log.warning("No supported encoding in font (%s).",
-                         font_bunch.filename)
-
-        if charmap_name == "ADOBE_STANDARD" and font_bunch.encoding:
-            enc0 = dviread.Encoding(font_bunch.encoding)
-            enc = {i: _get_adobe_standard_encoding().get(c, None)
-                   for i, c in enumerate(enc0.encoding)}
-        else:
-            enc = {}
+            # If psfonts.map specifies no encoding, the indices directly map to
+            # the font's builtin charmap (see the pdftex manual, section 6.1
+            # -- Map files); so don't use the FreeType-synthesized charmap but
+            # the native ones (we can't directly identify it but it's typically
+            # an Adobe charmap), and directly load the dvi glyph indices using
+            # FT_Load_Char/load_char.
+            for charmap_name, charmap_code in [("ADOBE_CUSTOM", 1094992451),
+                                               ("ADOBE_STANDARD", 1094995778)]:
+                try:
+                    font.select_charmap(charmap_code)
+                except (ValueError, RuntimeError):
+                    pass
+                else:
+                    break
+            else:
+                charmap_name = ""
+                _log.warning("No supported encoding in font (%s).",
+                             font_bunch.filename)
+            enc = None
 
         return font, enc