From 705b021fb4959edb0d316ecaad7be7c63d765c7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 25 Aug 2016 21:31:05 +0300 Subject: [PATCH 01/23] Handle dvi font names as ASCII bytestrings Dvi is a binary format that includes some ASCII strings such as TeX names of some fonts. The associated files such as psfonts.map need to be ASCII too. This patch changes their handling to keep them as binary strings all the time. This avoids the ugly workaround try: result = some_mapping[texname] except KeyError: result = some_mapping[texname.decode('ascii')] which is essentially saying that texname is sometimes a string, sometimes a bytestring. The workaround masks real KeyErrors, leading to incomprehensible error messages such as in #6516. --- lib/matplotlib/dviread.py | 64 +++++++++++++++------------- lib/matplotlib/tests/test_dviread.py | 31 ++++++++------ 2 files changed, 51 insertions(+), 44 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index d0bb70f12f18..c46ce298eace 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -510,7 +510,7 @@ class DviFont(object): Name of the font as used internally by TeX and friends. This is usually very different from any external font names, and :class:`dviread.PsfontsMap` can be used to find the external - name of the font. + name of the font. ASCII bytestring. .. attribute:: size @@ -526,8 +526,7 @@ class DviFont(object): __slots__ = ('texname', 'size', 'widths', '_scale', '_vf', '_tfm') def __init__(self, scale, tfm, texname, vf): - if six.PY3 and isinstance(texname, bytes): - texname = texname.decode('ascii') + assert(isinstance(texname, bytes)) self._scale, self._tfm, self.texname, self._vf = \ scale, tfm, texname, vf self.size = scale * (72.0 / (72.27 * 2**16)) @@ -807,42 +806,42 @@ def __init__(self, filename): self._parse(file) def __getitem__(self, texname): - try: - result = self._font[texname] - except KeyError: - result = self._font[texname.decode('ascii')] + assert(isinstance(texname, bytes)) + result = self._font[texname] fn, enc = result.filename, result.encoding - if fn is not None and not fn.startswith('/'): + if fn is not None and not fn.startswith(b'/'): fn = find_tex_file(fn) - if enc is not None and not enc.startswith('/'): + if enc is not None and not enc.startswith(b'/'): enc = find_tex_file(result.encoding) return result._replace(filename=fn, encoding=enc) def _parse(self, file): - """Parse each line into words.""" + """Parse each line into words and process them.""" + for line in file: + line = six.b(line) line = line.strip() - if line == '' or line.startswith('%'): + if line == b'' or line.startswith(b'%'): continue words, pos = [], 0 while pos < len(line): - if line[pos] == '"': # double quoted word + if line[pos:pos+1] == b'"': # double quoted word pos += 1 - end = line.index('"', pos) + end = line.index(b'"', pos) words.append(line[pos:end]) pos = end + 1 else: # ordinary word - end = line.find(' ', pos+1) + end = line.find(b' ', pos+1) if end == -1: end = len(line) words.append(line[pos:end]) pos = end - while pos < len(line) and line[pos] == ' ': + while pos < len(line) and line[pos:pos+1] == b' ': pos += 1 self._register(words) def _register(self, words): - """Register a font described by "words". + """Register a font described by "words", a sequence of bytestrings. The format is, AFAIK: texname fontname [effects and filenames] Effects are PostScript snippets like ".177 SlantFont", @@ -861,19 +860,23 @@ def _register(self, words): # http://tex.stackexchange.com/questions/10826/ # http://article.gmane.org/gmane.comp.tex.pdftex/4914 + # input must be bytestrings (the file format is ASCII) + for word in words: + assert(isinstance(word, bytes)) + texname, psname = words[:2] - effects, encoding, filename = '', None, None + effects, encoding, filename = b'', None, None for word in words[2:]: - if not word.startswith('<'): + if not word.startswith(b'<'): effects = word else: - word = word.lstrip('<') - if word.startswith('[') or word.endswith('.enc'): + word = word.lstrip(b'<') + if word.startswith(b'[') or word.endswith(b'.enc'): if encoding is not None: matplotlib.verbose.report( 'Multiple encodings for %s = %s' % (texname, psname), 'debug') - if word.startswith('['): + if word.startswith(b'['): encoding = word[1:] else: encoding = word @@ -884,11 +887,11 @@ def _register(self, words): eff = effects.split() effects = {} try: - effects['slant'] = float(eff[eff.index('SlantFont')-1]) + effects['slant'] = float(eff[eff.index(b'SlantFont')-1]) except ValueError: pass try: - effects['extend'] = float(eff[eff.index('ExtendFont')-1]) + effects['extend'] = float(eff[eff.index(b'ExtendFont')-1]) except ValueError: pass @@ -927,26 +930,27 @@ def _parse(self, file): state = 0 for line in file: - comment_start = line.find('%') + line = six.b(line) + comment_start = line.find(b'%') if comment_start > -1: line = line[:comment_start] line = line.strip() if state == 0: # Expecting something like /FooEncoding [ - if '[' in line: + if b'[' in line: state = 1 - line = line[line.index('[')+1:].strip() + line = line[line.index(b'[')+1:].strip() if state == 1: - if ']' in line: # ] def - line = line[:line.index(']')] + if b']' in line: # ] def + line = line[:line.index(b']')] state = 2 words = line.split() for w in words: - if w.startswith('/'): + if w.startswith(b'/'): # Allow for /abc/def/ghi - subwords = w.split('/') + subwords = w.split(b'/') result.extend(subwords[1:]) else: raise ValueError("Broken name in encoding file: " + w) diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 4d9ee14213c3..668da8d3f529 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -18,36 +18,36 @@ def test_PsfontsMap(monkeypatch): fontmap = dr.PsfontsMap(filename) # Check all properties of a few fonts for n in [1, 2, 3, 4, 5]: - key = 'TeXfont%d' % n + key = b'TeXfont%d' % n entry = fontmap[key] assert entry.texname == key - assert entry.psname == 'PSfont%d' % n + assert entry.psname == b'PSfont%d' % n if n not in [3, 5]: - assert entry.encoding == 'font%d.enc' % n + assert entry.encoding == b'font%d.enc' % n elif n == 3: - assert entry.encoding == 'enc3.foo' + assert entry.encoding == b'enc3.foo' # We don't care about the encoding of TeXfont5, which specifies # multiple encodings. if n not in [1, 5]: - assert entry.filename == 'font%d.pfa' % n + assert entry.filename == b'font%d.pfa' % n else: - assert entry.filename == 'font%d.pfb' % n + assert entry.filename == b'font%d.pfb' % n if n == 4: assert entry.effects == {'slant': -0.1, 'extend': 2.2} else: assert entry.effects == {} # Some special cases - entry = fontmap['TeXfont6'] + entry = fontmap[b'TeXfont6'] assert entry.filename is None assert entry.encoding is None - entry = fontmap['TeXfont7'] + entry = fontmap[b'TeXfont7'] assert entry.filename is None - assert entry.encoding == 'font7.enc' - entry = fontmap['TeXfont8'] - assert entry.filename == 'font8.pfb' + assert entry.encoding == b'font7.enc' + entry = fontmap[b'TeXfont8'] + assert entry.filename == b'font8.pfb' assert entry.encoding is None - entry = fontmap['TeXfont9'] - assert entry.filename == '/absolute/font9.pfb' + entry = fontmap[b'TeXfont9'] + assert entry.filename == b'/absolute/font9.pfb' @skip_if_command_unavailable(["kpsewhich", "-version"]) @@ -55,10 +55,13 @@ def test_dviread(): dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread') with open(os.path.join(dir, 'test.json')) as f: correct = json.load(f) + for entry in correct: + entry['text'] = [[a, b, c, six.b(d), e] + for [a, b, c, d, e] in entry['text']] with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi: data = [{'text': [[t.x, t.y, six.unichr(t.glyph), - six.text_type(t.font.texname), + t.font.texname, round(t.font.size, 2)] for t in page.text], 'boxes': [[b.x, b.y, b.height, b.width] for b in page.boxes]} From dbc8b9e2c04a848bd81962fa4c5c525548f83c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 25 Aug 2016 22:00:42 +0300 Subject: [PATCH 02/23] Test that the KeyError is raised when the font is missing --- lib/matplotlib/tests/test_dviread.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 668da8d3f529..83a40908fb82 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -7,6 +7,7 @@ import matplotlib.dviread as dr import os.path import json +import pytest def test_PsfontsMap(monkeypatch): @@ -48,6 +49,10 @@ def test_PsfontsMap(monkeypatch): assert entry.encoding is None entry = fontmap[b'TeXfont9'] assert entry.filename == b'/absolute/font9.pfb' + # Missing font + with pytest.raises(KeyError) as exc: + fontmap[b'no-such-font'] + assert b'no-such-font' in bytes(exc.value) @skip_if_command_unavailable(["kpsewhich", "-version"]) From 93fad5555f5970b64c39f8a06750c7dd15fcf4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 25 Aug 2016 22:13:03 +0300 Subject: [PATCH 03/23] Mention bytestrings in docstring --- lib/matplotlib/dviread.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index c46ce298eace..9cbdc152a998 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -769,11 +769,11 @@ class PsfontsMap(object): Usage:: >>> map = PsfontsMap(find_tex_file('pdftex.map')) - >>> entry = map['ptmbo8r'] + >>> entry = map[b'ptmbo8r'] >>> entry.texname - 'ptmbo8r' + b'ptmbo8r' >>> entry.psname - 'Times-Bold' + b'Times-Bold' >>> entry.encoding '/usr/local/texlive/2008/texmf-dist/fonts/enc/dvips/base/8r.enc' >>> entry.effects From 4874e4e08c9503193a38a73e85f1cfe3b97a254b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 25 Aug 2016 22:13:11 +0300 Subject: [PATCH 04/23] Add a helpful note when raising KeyError from dviread.PsFonts So if you follow the troubleshooting instructions and rerun with --verbose-helpful you get a hint about the usual reason for #6516. --- lib/matplotlib/dviread.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 9cbdc152a998..8a8a90bfb998 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -33,6 +33,7 @@ import numpy as np import struct import sys +import textwrap import os if six.PY3: @@ -798,16 +799,33 @@ class PsfontsMap(object): while the pdf-related files perhaps only avoid the "Base 14" pdf fonts. But the user may have configured these files differently. """ - __slots__ = ('_font',) + __slots__ = ('_font', '_filename') def __init__(self, filename): self._font = {} + self._filename = filename + if six.PY3 and isinstance(filename, bytes): + self._filename = filename.decode('ascii', errors='replace') with open(filename, 'rt') as file: self._parse(file) def __getitem__(self, texname): assert(isinstance(texname, bytes)) - result = self._font[texname] + try: + result = self._font[texname] + except KeyError: + matplotlib.verbose.report(textwrap.fill + ('A PostScript file for the font whose TeX name is "%s" ' + 'could not be found in the file "%s". The dviread module ' + 'can only handle fonts that have an associated PostScript ' + 'font file. ' + 'This problem can often be solved by installing ' + 'a suitable PostScript font package in your (TeX) ' + 'package manager.' % (texname.decode('ascii'), + self._filename), + break_on_hyphens=False, break_long_words=False), + 'helpful') + raise fn, enc = result.filename, result.encoding if fn is not None and not fn.startswith(b'/'): fn = find_tex_file(fn) From a130ba720a57e4c6ca23b1a83d1fd1f9e789ba04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 25 Aug 2016 22:19:02 +0300 Subject: [PATCH 05/23] Attempted fix for Python 3.4 compatibility --- lib/matplotlib/tests/test_dviread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 83a40908fb82..7b5009fded19 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -19,7 +19,7 @@ def test_PsfontsMap(monkeypatch): fontmap = dr.PsfontsMap(filename) # Check all properties of a few fonts for n in [1, 2, 3, 4, 5]: - key = b'TeXfont%d' % n + key = ('TeXfont%d' % n).encode('ascii') entry = fontmap[key] assert entry.texname == key assert entry.psname == b'PSfont%d' % n From 0f0e41ab3c377aa810c15d34aedd43414e6ad71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Fri, 26 Aug 2016 06:18:15 +0300 Subject: [PATCH 06/23] More python 3.4 compatibility --- lib/matplotlib/tests/test_dviread.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 7b5009fded19..1d6653182311 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -22,17 +22,17 @@ def test_PsfontsMap(monkeypatch): key = ('TeXfont%d' % n).encode('ascii') entry = fontmap[key] assert entry.texname == key - assert entry.psname == b'PSfont%d' % n + assert entry.psname == ('PSfont%d' % n).encode('ascii') if n not in [3, 5]: - assert entry.encoding == b'font%d.enc' % n + assert entry.encoding == ('font%d.enc' % n).encode('ascii') elif n == 3: assert entry.encoding == b'enc3.foo' # We don't care about the encoding of TeXfont5, which specifies # multiple encodings. if n not in [1, 5]: - assert entry.filename == b'font%d.pfa' % n + assert entry.filename == ('font%d.pfa' % n).encode('ascii') else: - assert entry.filename == b'font%d.pfb' % n + assert entry.filename == ('font%d.pfb' % n).encode('ascii') if n == 4: assert entry.effects == {'slant': -0.1, 'extend': 2.2} else: From a7b57720ef6362bca7f7aa4ed4feabe7eea71591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 21:58:58 +0200 Subject: [PATCH 07/23] Use numpydoc format for several dviread docstrings --- doc/api/dviread.rst | 5 ++ lib/matplotlib/dviread.py | 165 +++++++++++++++++++++++++------------- 2 files changed, 113 insertions(+), 57 deletions(-) diff --git a/doc/api/dviread.rst b/doc/api/dviread.rst index 99549ce02f59..ceee143b6080 100644 --- a/doc/api/dviread.rst +++ b/doc/api/dviread.rst @@ -8,4 +8,9 @@ dviread .. automodule:: matplotlib.dviread :members: :undoc-members: + :exclude-members: Dvi + :show-inheritance: + +.. autoclass:: matplotlib.dviread.Dvi + :members: __iter__,close :show-inheritance: diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 8a8a90bfb998..69a44e63a5cb 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -217,24 +217,32 @@ def _get_baseline(self, filename): return None def __enter__(self): + """ + Context manager enter method, does nothing. + """ return self def __exit__(self, etype, evalue, etrace): + """ + Context manager exit method, closes the underlying file if it is open. + """ self.close() def __iter__(self): """ Iterate through the pages of the file. - Returns (text, boxes) pairs, where: - text is a list of (x, y, fontnum, glyphnum, width) tuples - boxes is a list of (x, y, height, width) tuples - - The coordinates are transformed into a standard Cartesian - coordinate system at the dpi value given when initializing. - The coordinates are floating point numbers, but otherwise - precision is not lost and coordinate values are not clipped to - integers. + Yields + ------ + Page + Details of all the text and box objects on the page. + The Page tuple contains lists of Text and Box tuples and + the page dimensions, and the Text and Box tuples contain + coordinates transformed into a standard Cartesian + coordinate system at the dpi value given when initializing. + The coordinates are floating point numbers, but otherwise + precision is not lost and coordinate values are not clipped to + integers. """ while True: have_page = self._read() @@ -499,27 +507,38 @@ def _malformed(self, offset): class DviFont(object): """ - Object that holds a font's texname and size, supports comparison, + Encapsulation of a font that a DVI file can refer to. + + This class holds a font's texname and size, supports comparison, and knows the widths of glyphs in the same units as the AFM file. There are also internal attributes (for use by dviread.py) that are *not* used for comparison. The size is in Adobe points (converted from TeX points). - .. attribute:: texname - - Name of the font as used internally by TeX and friends. This - is usually very different from any external font names, and - :class:`dviread.PsfontsMap` can be used to find the external - name of the font. ASCII bytestring. - - .. attribute:: size - + Parameters + ---------- + + scale : float + Factor by which the font is scaled from its natural size. + tfm : Tfm + TeX font metrics for this font + texname : bytes + Name of the font as used internally by TeX and friends, as an + ASCII bytestring. This is usually very different from any external + font names, and :class:`dviread.PsfontsMap` can be used to find + the external name of the font. + vf : Vf + A TeX "virtual font" file, or None if this font is not virtual. + + Attributes + ---------- + + texname : bytes + size : float Size of the font in Adobe points, converted from the slightly smaller TeX points. - - .. attribute:: widths - + widths : list Widths of glyphs in glyph-space units, typically 1/1000ths of the point size. @@ -579,12 +598,6 @@ def _height_depth_of(self, char): return result -# The virtual font format is a derivative of dvi: -# http://mirrors.ctan.org/info/knuth/virtual-fonts -# The following class reuses some of the machinery of Dvi -# but replaces the _read loop and dispatch mechanism. - - class Vf(Dvi): """ A virtual font (\*.vf file) containing subroutines for dvi files. @@ -594,6 +607,19 @@ class Vf(Dvi): vf = Vf(filename) glyph = vf[code] glyph.text, glyph.boxes, glyph.width + + Parameters + ---------- + + filename : string or bytestring + + Notes + ----- + + The virtual font format is a derivative of dvi: + http://mirrors.ctan.org/info/knuth/virtual-fonts + This class reuses some of the machinery of `Dvi` + but replaces the `_read` loop and dispatch mechanism. """ def __init__(self, filename): @@ -704,29 +730,27 @@ def _mul2012(num1, num2): class Tfm(object): """ - A TeX Font Metric file. This implementation covers only the bare - minimum needed by the Dvi class. + A TeX Font Metric file. - .. attribute:: checksum + This implementation covers only the bare minimum needed by the Dvi class. - Used for verifying against the dvi file. - - .. attribute:: design_size - - Design size of the font (in what units?) - - .. attribute:: width + Parameters + ---------- + filename : string or bytestring + Attributes + ---------- + checksum : int + Used for verifying against the dvi file. + design_size : int + Design size of the font (unknown units) + width : dict Width of each character, needs to be scaled by the factor specified in the dvi file. This is a dict because indexing may not start from 0. - - .. attribute:: height - + height : dict Height of each character. - - .. attribute:: depth - + depth : dict Depth of each character. """ __slots__ = ('checksum', 'design_size', 'width', 'height', 'depth') @@ -767,6 +791,7 @@ def __init__(self, filename): class PsfontsMap(object): """ A psfonts.map formatted file, mapping TeX fonts to PS fonts. + Usage:: >>> map = PsfontsMap(find_tex_file('pdftex.map')) @@ -781,6 +806,14 @@ class PsfontsMap(object): {'slant': 0.16700000000000001} >>> entry.filename + Parameters + ---------- + + filename : string or bytestring + + Notes + ----- + For historical reasons, TeX knows many Type-1 fonts by different names than the outside world. (For one thing, the names have to fit in eight characters.) Also, TeX's native fonts are not Type-1 @@ -792,12 +825,14 @@ class PsfontsMap(object): file names. A texmf tree typically includes mapping files called e.g. - psfonts.map, pdftex.map, dvipdfm.map. psfonts.map is used by - dvips, pdftex.map by pdfTeX, and dvipdfm.map by dvipdfm. - psfonts.map might avoid embedding the 35 PostScript fonts (i.e., - have no filename for them, as in the Times-Bold example above), - while the pdf-related files perhaps only avoid the "Base 14" pdf - fonts. But the user may have configured these files differently. + :file:`psfonts.map`, :file:`pdftex.map`, or :file:`dvipdfm.map`. + The file :file:`psfonts.map` is used by :program:`dvips`, + :file:`pdftex.map` by :program:`pdfTeX`, and :file:`dvipdfm.map` + by :program:`dvipdfm`. :file:`psfonts.map` might avoid embedding + the 35 PostScript fonts (i.e., have no filename for them, as in + the Times-Bold example above), while the pdf-related files perhaps + only avoid the "Base 14" pdf fonts. But the user may have + configured these files differently. """ __slots__ = ('_font', '_filename') @@ -928,6 +963,15 @@ class Encoding(object): for name in Encoding(filename): whatever(name) + + Parameters + ---------- + filename : string or bytestring + + Attributes + ---------- + encoding : list + List of character names """ __slots__ = ('encoding',) @@ -978,17 +1022,24 @@ def _parse(self, file): def find_tex_file(filename, format=None): """ - Call :program:`kpsewhich` to find a file in the texmf tree. If - *format* is not None, it is used as the value for the - `--format` option. + Find a file in the texmf tree. - Apparently most existing TeX distributions on Unix-like systems - use kpathsea. It's also available as part of MikTeX, a popular + Calls :program:`kpsewhich` which is an interface to the kpathsea + library [1]_. Most existing TeX distributions on Unix-like systems use + kpathsea. It is also available as part of MikTeX, a popular distribution on Windows. - .. seealso:: + Parameters + ---------- + filename : string or bytestring + format : string or bytestring + Used as the value of the `--format` option to :program:`kpsewhich`. + Could be e.g. 'tfm' or 'vf' to limit the search to that type of files. + + References + ---------- - `Kpathsea documentation `_ + .. [1] `Kpathsea documentation `_ The library that :program:`kpsewhich` is part of. """ From 803a96e2d9cb40cca2a3f6c7048d53a9789ea109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 22:00:14 +0200 Subject: [PATCH 08/23] Remove useless docstring --- lib/matplotlib/dviread.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 69a44e63a5cb..bdd390c7ec8d 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -869,8 +869,6 @@ def __getitem__(self, texname): return result._replace(filename=fn, encoding=enc) def _parse(self, file): - """Parse each line into words and process them.""" - for line in file: line = six.b(line) line = line.strip() From ec5d80e66e443b5c43a80c981139fd78c43e9ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 22:02:07 +0200 Subject: [PATCH 09/23] Raise a more useful exception --- lib/matplotlib/dviread.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index bdd390c7ec8d..151628dfc173 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -546,7 +546,9 @@ class DviFont(object): __slots__ = ('texname', 'size', 'widths', '_scale', '_vf', '_tfm') def __init__(self, scale, tfm, texname, vf): - assert(isinstance(texname, bytes)) + if not isinstance(texname, bytes): + raise ValueError("texname must be a bytestring, got %s" + % type(texname)) self._scale, self._tfm, self.texname, self._vf = \ scale, tfm, texname, vf self.size = scale * (72.0 / (72.27 * 2**16)) From fe52808d4381d020eeda95873508766a6db68d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 22:02:26 +0200 Subject: [PATCH 10/23] Remove misleading parentheses from assert --- lib/matplotlib/dviread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 151628dfc173..27763c6187f3 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -915,7 +915,7 @@ def _register(self, words): # input must be bytestrings (the file format is ASCII) for word in words: - assert(isinstance(word, bytes)) + assert isinstance(word, bytes) texname, psname = words[:2] effects, encoding, filename = b'', None, None From aa8c4f6a943dfb9cce37b40a3052583d50fca1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 22:24:19 +0200 Subject: [PATCH 11/23] Simplify parsing with regular expressions --- lib/matplotlib/dviread.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 27763c6187f3..812a29bf79e4 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -31,6 +31,7 @@ from matplotlib.compat import subprocess from matplotlib import rcParams import numpy as np +import re import struct import sys import textwrap @@ -876,21 +877,8 @@ def _parse(self, file): line = line.strip() if line == b'' or line.startswith(b'%'): continue - words, pos = [], 0 - while pos < len(line): - if line[pos:pos+1] == b'"': # double quoted word - pos += 1 - end = line.index(b'"', pos) - words.append(line[pos:end]) - pos = end + 1 - else: # ordinary word - end = line.find(b' ', pos+1) - if end == -1: - end = len(line) - words.append(line[pos:end]) - pos = end - while pos < len(line) and line[pos:pos+1] == b' ': - pos += 1 + words = [word.strip(b'"') for word in + re.findall(b'("[^"]*"|[^ ]+)', line)] self._register(words) def _register(self, words): From 9de07aa4cf57cedd8ea1f3e9f6f21abd0787f5df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 27 Dec 2016 23:08:15 +0200 Subject: [PATCH 12/23] Perhaps simplify further with regular expressions --- lib/matplotlib/dviread.py | 61 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 812a29bf79e4..df00b4b21128 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -906,38 +906,43 @@ def _register(self, words): assert isinstance(word, bytes) texname, psname = words[:2] + words = words[2:] effects, encoding, filename = b'', None, None - for word in words[2:]: - if not word.startswith(b'<'): - effects = word - else: - word = word.lstrip(b'<') - if word.startswith(b'[') or word.endswith(b'.enc'): - if encoding is not None: - matplotlib.verbose.report( - 'Multiple encodings for %s = %s' - % (texname, psname), 'debug') - if word.startswith(b'['): - encoding = word[1:] - else: - encoding = word - else: - assert filename is None - filename = word - eff = effects.split() - effects = {} - try: - effects['slant'] = float(eff[eff.index(b'SlantFont')-1]) - except ValueError: - pass - try: - effects['extend'] = float(eff[eff.index(b'ExtendFont')-1]) - except ValueError: - pass + # pick the last non-filename word for effects + effects_words = [word for word in words if not word.startswith(b'<')] + if effects_words: + effects = effects_words[-1] + + encoding_re = br'< 1: + matplotlib.verbose.report( + 'Multiple encodings for %s = %s' % (texname, psname), 'debug') + if encoding_files: + encoding = encoding_files[-1] + + font_files = [word.lstrip(b'<') + for word in words + if word.startswith(b'<') + and not re.match(encoding_re, word)] + if font_files: + filename = font_files[-1] + + eff = {} + for psword, keyword in ((b'SlantFont', 'slant'), + (b'ExtendFont', 'extend')): + match = re.search(b'([^ ]+) +' + psword, effects) + if match: + try: + eff[keyword] = float(match.group(1)) + except ValueError: + pass self._font[texname] = PsFont( - texname=texname, psname=psname, effects=effects, + texname=texname, psname=psname, effects=eff, encoding=encoding, filename=filename) From c87b653275072cb31e528ea06e1fd109311c4bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Fri, 30 Dec 2016 00:04:14 +0200 Subject: [PATCH 13/23] Remove useless assert If the class has an internal error triggering this assertion, we will get some errors later from mixing strings and bytestrings. --- lib/matplotlib/dviread.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index df00b4b21128..fdd9fcfec08a 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -901,10 +901,6 @@ def _register(self, words): # http://tex.stackexchange.com/questions/10826/ # http://article.gmane.org/gmane.comp.tex.pdftex/4914 - # input must be bytestrings (the file format is ASCII) - for word in words: - assert isinstance(word, bytes) - texname, psname = words[:2] words = words[2:] effects, encoding, filename = b'', None, None From 2e19a6184384c7f886b2680ae8912b6b124d540f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sat, 31 Dec 2016 22:32:44 +0200 Subject: [PATCH 14/23] Fix dvi font name handling in pdf backend These are now ASCII bytestrings so we should not assume they are strings. --- lib/matplotlib/backends/backend_pdf.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index a5e6253ca387..fceb2822c74d 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -491,7 +491,9 @@ def __init__(self, filename, metadata=None): self.infoDict = {k: v for (k, v) in self.infoDict.items() if v is not None} - self.fontNames = {} # maps filenames to internal font names + # fontNames maps filenames/dvi names to internal font names; + # dvi font names have an entry in dviFontInfo + self.fontNames = {} self.nextFont = 1 # next free internal font name self.dviFontInfo = {} # information on dvi fonts # differently encoded Type-1 fonts may share the same descriptor @@ -636,11 +638,11 @@ def endStream(self): def fontName(self, fontprop): """ Select a font based on fontprop and return a name suitable for - Op.selectfont. If fontprop is a string, it will be interpreted - as the filename (or dvi name) of the font. + Op.selectfont. If fontprop is a string or bytestring, it will + be interpreted as the filename or dvi name of the font. """ - if is_string_like(fontprop): + if isinstance(fontprop, (str, bytes)): filename = fontprop elif rcParams['pdf.use14corefonts']: filename = findfont( @@ -667,16 +669,16 @@ def writeFonts(self): for filename in sorted(self.fontNames): Fx = self.fontNames[filename] matplotlib.verbose.report('Embedding font %s' % filename, 'debug') - if filename.endswith('.afm'): - # from pdf.use14corefonts - matplotlib.verbose.report('Writing AFM font', 'debug') - fonts[Fx] = self._write_afm_font(filename) - elif filename in self.dviFontInfo: + if filename in self.dviFontInfo: # a Type 1 font from a dvi file; # the filename is really the TeX name matplotlib.verbose.report('Writing Type-1 font', 'debug') fonts[Fx] = self.embedTeXFont(filename, self.dviFontInfo[filename]) + elif filename.endswith('.afm'): + # from pdf.use14corefonts + matplotlib.verbose.report('Writing AFM font', 'debug') + fonts[Fx] = self._write_afm_font(filename) else: # a normal TrueType font matplotlib.verbose.report('Writing TrueType font', 'debug') @@ -699,8 +701,8 @@ def _write_afm_font(self, filename): return fontdictObject def embedTeXFont(self, texname, fontinfo): - msg = ('Embedding TeX font ' + texname + ' - fontinfo=' + - repr(fontinfo.__dict__)) + msg = ('Embedding TeX font {0} - fontinfo={1}' + .format(texname, fontinfo.__dict__)) matplotlib.verbose.report(msg, 'debug') # Widths From 119934af5d3f1489ce203cd0263b24cee8e041a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sun, 1 Jan 2017 09:39:22 +0200 Subject: [PATCH 15/23] Separate the handling of dvi fonts in the pdf backend Don't mix filenames and dvi font names as keys of the same dict. --- lib/matplotlib/backends/backend_pdf.py | 91 +++++++++++++++----------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index fceb2822c74d..8d6b0e68c619 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -491,11 +491,10 @@ def __init__(self, filename, metadata=None): self.infoDict = {k: v for (k, v) in self.infoDict.items() if v is not None} - # fontNames maps filenames/dvi names to internal font names; - # dvi font names have an entry in dviFontInfo - self.fontNames = {} + self.fontNames = {} # maps filenames to internal font names self.nextFont = 1 # next free internal font name - self.dviFontInfo = {} # information on dvi fonts + self.dviFontInfo = {} # maps dvi font names to embedding information + self.texFontMap = None # maps TeX font names to PostScript fonts # differently encoded Type-1 fonts may share the same descriptor self.type1Descriptors = {} self.used_characters = {} @@ -638,11 +637,11 @@ def endStream(self): def fontName(self, fontprop): """ Select a font based on fontprop and return a name suitable for - Op.selectfont. If fontprop is a string or bytestring, it will - be interpreted as the filename or dvi name of the font. + Op.selectfont. If fontprop is a string, it will be interpreted + as the filename of the font. """ - if isinstance(fontprop, (str, bytes)): + if isinstance(fontprop, six.string_types): filename = fontprop elif rcParams['pdf.use14corefonts']: filename = findfont( @@ -664,18 +663,55 @@ def fontName(self, fontprop): return Fx + def dviFontName(self, dvifont): + """ + Given a dvi font object, return a name suitable for Op.selectfont. + This registers the font information in self.dviFontInfo if not yet + registered. + """ + + dvi_info = self.dviFontInfo.get(dvifont.texname) + if dvi_info is not None: + return dvi_info.pdfname + + # lazy-load texFontMap, it takes a while to parse + # and usetex is a relatively rare use case + if self.texFontMap is None: + self.texFontMap = \ + dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) + + psfont = self.texFontMap[dvifont.texname] + if psfont.filename is None: + raise ValueError( + ("No usable font file found for {0} (TeX: {1}). " + "The font may lack a Type-1 version.") + .format(psfont.psname, dvifont.texname)) + + pdfname = Name('F%d' % self.nextFont) + self.nextFont += 1 + matplotlib.verbose.report( + 'Assigning font {0} = {1} (dvi)'.format(pdfname, dvifont.texname), + 'debug') + self.dviFontInfo[dvifont.texname] = Bunch( + dvifont=dvifont, + pdfname=pdfname, + fontfile=psfont.filename, + basefont=psfont.psname, + encodingfile=psfont.encoding, + effects=psfont.effects) + return pdfname + def writeFonts(self): fonts = {} + for dviname, info in sorted(self.dviFontInfo.items()): + Fx = info.pdfname + matplotlib.verbose.report('Embedding Type-1 font %s from dvi' + % dviname, 'debug') + fonts[Fx] = self.embedTeXFont(info) for filename in sorted(self.fontNames): Fx = self.fontNames[filename] matplotlib.verbose.report('Embedding font %s' % filename, 'debug') - if filename in self.dviFontInfo: - # a Type 1 font from a dvi file; - # the filename is really the TeX name - matplotlib.verbose.report('Writing Type-1 font', 'debug') - fonts[Fx] = self.embedTeXFont(filename, - self.dviFontInfo[filename]) - elif filename.endswith('.afm'): + if filename.endswith('.afm'): # from pdf.use14corefonts matplotlib.verbose.report('Writing AFM font', 'debug') fonts[Fx] = self._write_afm_font(filename) @@ -700,9 +736,9 @@ def _write_afm_font(self, filename): self.writeObject(fontdictObject, fontdict) return fontdictObject - def embedTeXFont(self, texname, fontinfo): + def embedTeXFont(self, fontinfo): msg = ('Embedding TeX font {0} - fontinfo={1}' - .format(texname, fontinfo.__dict__)) + .format(fontinfo.dvifont.texname, fontinfo.__dict__)) matplotlib.verbose.report(msg, 'debug') # Widths @@ -1572,7 +1608,6 @@ def __init__(self, file, image_dpi, height, width): self.gc = self.new_gc() self.mathtext_parser = MathTextParser("Pdf") self.image_dpi = image_dpi - self.tex_font_map = None def finalize(self): self.file.output(*self.gc.finalize()) @@ -1598,12 +1633,6 @@ def check_gc(self, gc, fillcolor=None): gc._fillcolor = orig_fill gc._effective_alphas = orig_alphas - def tex_font_mapping(self, texfont): - if self.tex_font_map is None: - self.tex_font_map = \ - dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) - return self.tex_font_map[texfont] - def track_characters(self, font, s): """Keeps track of which characters are required from each font.""" @@ -1896,21 +1925,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, ismath='TeX!', mtext=None): oldfont, seq = None, [] for x1, y1, dvifont, glyph, width in page.text: if dvifont != oldfont: - pdfname = self.file.fontName(dvifont.texname) - if dvifont.texname not in self.file.dviFontInfo: - psfont = self.tex_font_mapping(dvifont.texname) - if psfont.filename is None: - self.file.broken = True - raise ValueError( - ("No usable font file found for %s (%s). " - "The font may lack a Type-1 version.") - % (psfont.psname, dvifont.texname)) - self.file.dviFontInfo[dvifont.texname] = Bunch( - fontfile=psfont.filename, - basefont=psfont.psname, - encodingfile=psfont.encoding, - effects=psfont.effects, - dvifont=dvifont) + pdfname = self.file.dviFontName(dvifont) seq += [['font', pdfname, dvifont.size]] oldfont = dvifont # We need to convert the glyph numbers to bytes, and the easiest From 8fa303f729cbea1ec699cd0451ed2d5a9b0f7e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Mon, 2 Jan 2017 07:37:44 +0200 Subject: [PATCH 16/23] Simplify enc file parsing Use re.findall, and open the file as binary. --- lib/matplotlib/dviread.py | 45 ++++++++++++++------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index fdd9fcfec08a..ada4a7a1b931 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -965,7 +965,7 @@ class Encoding(object): __slots__ = ('encoding',) def __init__(self, filename): - with open(filename, 'rt') as file: + with open(filename, 'rb') as file: matplotlib.verbose.report('Parsing TeX encoding ' + filename, 'debug-annoying') self.encoding = self._parse(file) @@ -979,34 +979,21 @@ def __iter__(self): def _parse(self, file): result = [] - state = 0 - for line in file: - line = six.b(line) - comment_start = line.find(b'%') - if comment_start > -1: - line = line[:comment_start] - line = line.strip() - - if state == 0: - # Expecting something like /FooEncoding [ - if b'[' in line: - state = 1 - line = line[line.index(b'[')+1:].strip() - - if state == 1: - if b']' in line: # ] def - line = line[:line.index(b']')] - state = 2 - words = line.split() - for w in words: - if w.startswith(b'/'): - # Allow for /abc/def/ghi - subwords = w.split(b'/') - result.extend(subwords[1:]) - else: - raise ValueError("Broken name in encoding file: " + w) - - return result + lines = (line[:line.find(b'%')] if b'%' in line else line.strip() + for line in file) + data = b''.join(lines) + match = re.search(six.b(r'\['), data) + if not match: + raise ValueError("Cannot locate beginning of encoding in {}" + .format(file)) + data = data[match.span()[1]:] + match = re.search(six.b(r'\]'), data) + if not match: + raise ValueError("Cannot locate end of encoding in {}" + .format(file)) + data = data[:match.span()[0]] + + return re.findall(six.b(r'/([^][{}<>\s]+)'), data) def find_tex_file(filename, format=None): From 94587b1b8ea7c93f468675efba2c1c8e5d7709d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 3 Jan 2017 07:30:14 +0200 Subject: [PATCH 17/23] Small changes in response to code review Improve a docstring, remove unneeded parens from an assert, open a file as binary instead of encoding each line read from it, don't call six.b on variable strings, simplify string handling, improve the formatting of a matplotlib.verbose.report call. --- lib/matplotlib/dviread.py | 54 +++++++++++++--------------- lib/matplotlib/tests/test_dviread.py | 2 +- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index ada4a7a1b931..94a3f9f8e31b 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -747,14 +747,10 @@ class Tfm(object): Used for verifying against the dvi file. design_size : int Design size of the font (unknown units) - width : dict - Width of each character, needs to be scaled by the factor - specified in the dvi file. This is a dict because indexing may + width, height, depth : dict + Dimensions of each character, need to be scaled by the factor + specified in the dvi file. These are dicts because indexing may not start from 0. - height : dict - Height of each character. - depth : dict - Depth of each character. """ __slots__ = ('checksum', 'design_size', 'width', 'height', 'depth') @@ -844,25 +840,25 @@ def __init__(self, filename): self._filename = filename if six.PY3 and isinstance(filename, bytes): self._filename = filename.decode('ascii', errors='replace') - with open(filename, 'rt') as file: + with open(filename, 'rb') as file: self._parse(file) def __getitem__(self, texname): - assert(isinstance(texname, bytes)) + assert isinstance(texname, bytes) try: result = self._font[texname] except KeyError: - matplotlib.verbose.report(textwrap.fill - ('A PostScript file for the font whose TeX name is "%s" ' - 'could not be found in the file "%s". The dviread module ' - 'can only handle fonts that have an associated PostScript ' - 'font file. ' - 'This problem can often be solved by installing ' - 'a suitable PostScript font package in your (TeX) ' - 'package manager.' % (texname.decode('ascii'), - self._filename), - break_on_hyphens=False, break_long_words=False), - 'helpful') + fmt = ('A PostScript file for the font whose TeX name is "{0}" ' + 'could not be found in the file "{1}". The dviread module ' + 'can only handle fonts that have an associated PostScript ' + 'font file. ' + 'This problem can often be solved by installing ' + 'a suitable PostScript font package in your (TeX) ' + 'package manager.') + msg = fmt.format(texname.decode('ascii'), self._filename) + msg = textwrap.fill(msg, break_on_hyphens=False, + break_long_words=False) + matplotlib.verbose.report(msg, 'helpful') raise fn, enc = result.filename, result.encoding if fn is not None and not fn.startswith(b'/'): @@ -873,7 +869,6 @@ def __getitem__(self, texname): def _parse(self, file): for line in file: - line = six.b(line) line = line.strip() if line == b'' or line.startswith(b'%'): continue @@ -979,21 +974,20 @@ def __iter__(self): def _parse(self, file): result = [] - lines = (line[:line.find(b'%')] if b'%' in line else line.strip() - for line in file) + lines = (line.split(b'%', 1)[0].strip() for line in file) data = b''.join(lines) - match = re.search(six.b(r'\['), data) - if not match: + beginning = data.find(b'[') + if beginning < 0: raise ValueError("Cannot locate beginning of encoding in {}" .format(file)) - data = data[match.span()[1]:] - match = re.search(six.b(r'\]'), data) - if not match: + data = data[beginning:] + end = data.find(b']') + if end < 0: raise ValueError("Cannot locate end of encoding in {}" .format(file)) - data = data[:match.span()[0]] + data = data[:end] - return re.findall(six.b(r'/([^][{}<>\s]+)'), data) + return re.findall(br'/([^][{}<>\s]+)', data) def find_tex_file(filename, format=None): diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 1d6653182311..3cd66f124e37 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -61,7 +61,7 @@ def test_dviread(): with open(os.path.join(dir, 'test.json')) as f: correct = json.load(f) for entry in correct: - entry['text'] = [[a, b, c, six.b(d), e] + entry['text'] = [[a, b, c, d.encode('ascii'), e] for [a, b, c, d, e] in entry['text']] with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi: data = [{'text': [[t.x, t.y, From 254e3dffe13ae2da29e7a1e54fe7c9bbe0edf5f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Tue, 3 Jan 2017 09:36:36 +0200 Subject: [PATCH 18/23] Simplify psfonts.map parsing further Combine the word splitting and classification in one regex so we only have to scan each line once. Add some quotation marks in the test case to check that we are handling quoted words correctly (the behavior should always have matched this test case). --- lib/matplotlib/dviread.py | 104 ++++++++++-------- .../tests/baseline_images/dviread/test.map | 8 +- 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 94a3f9f8e31b..53e19f09b677 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -868,16 +868,8 @@ def __getitem__(self, texname): return result._replace(filename=fn, encoding=enc) def _parse(self, file): - for line in file: - line = line.strip() - if line == b'' or line.startswith(b'%'): - continue - words = [word.strip(b'"') for word in - re.findall(b'("[^"]*"|[^ ]+)', line)] - self._register(words) - - def _register(self, words): - """Register a font described by "words", a sequence of bytestrings. + """ + Parse the font mapping file. The format is, AFAIK: texname fontname [effects and filenames] Effects are PostScript snippets like ".177 SlantFont", @@ -889,52 +881,68 @@ def _register(self, words): There is some difference between [^"]+ )" | # quoted encoding marked by [ + "< (?P [^"]+.enc)" | # quoted encoding, ends in .enc + "< [^"]+ )" | # quoted font file name + " (?P [^"]+ )" | # quoted effects or font name + <\[ (?P \S+ ) | # encoding marked by [ + < (?P \S+ .enc) | # encoding, ends in .enc + < \S+ ) | # font file name + (?P \S+ ) # effects or font name + )''') + effects_re = re.compile( + br'''(?x) (?P -?[0-9]*(?:\.[0-9]+)) \s* SlantFont + | (?P-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''') + + lines = (line.strip() + for line in file + if not empty_re.match(line)) + for line in lines: + effects, encoding, filename = b'', None, None + words = word_re.finditer(line) + + w = next(words) + texname = w.group('eff2') or w.group('eff1') + w = next(words) + psname = w.group('eff2') or w.group('eff1') + + for w in words: + eff = w.group('eff1') or w.group('eff2') + if eff: + effects = eff + continue + enc = (w.group('enc4') or w.group('enc3') or + w.group('enc2') or w.group('enc1')) + if enc: + if encoding is not None: + matplotlib.verbose.report( + 'Multiple encodings for %s = %s' + % (texname, psname), + 'debug') + encoding = enc + continue + filename = w.group('file2') or w.group('file1') - # pick the last non-filename word for effects - effects_words = [word for word in words if not word.startswith(b'<')] - if effects_words: - effects = effects_words[-1] + effects_dict = {} + for match in effects_re.finditer(effects): + slant = match.group('slant') + if slant: + effects_dict['slant'] = float(slant) + else: + effects_dict['extend'] = float(match.group('extend')) - encoding_re = br'< 1: - matplotlib.verbose.report( - 'Multiple encodings for %s = %s' % (texname, psname), 'debug') - if encoding_files: - encoding = encoding_files[-1] - - font_files = [word.lstrip(b'<') - for word in words - if word.startswith(b'<') - and not re.match(encoding_re, word)] - if font_files: - filename = font_files[-1] - - eff = {} - for psword, keyword in ((b'SlantFont', 'slant'), - (b'ExtendFont', 'extend')): - match = re.search(b'([^ ]+) +' + psword, effects) - if match: - try: - eff[keyword] = float(match.group(1)) - except ValueError: - pass - - self._font[texname] = PsFont( - texname=texname, psname=psname, effects=eff, - encoding=encoding, filename=filename) + self._font[texname] = PsFont( + texname=texname, psname=psname, effects=effects_dict, + encoding=encoding, filename=filename) class Encoding(object): diff --git a/lib/matplotlib/tests/baseline_images/dviread/test.map b/lib/matplotlib/tests/baseline_images/dviread/test.map index eb5bea7a2076..96a4ca6f51cb 100644 --- a/lib/matplotlib/tests/baseline_images/dviread/test.map +++ b/lib/matplotlib/tests/baseline_images/dviread/test.map @@ -1,9 +1,9 @@ % used by test_dviread.py -TeXfont1 PSfont1 Date: Sun, 29 Jan 2017 20:29:33 +0200 Subject: [PATCH 19/23] Try to fix the KeyError test --- lib/matplotlib/tests/test_dviread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py index 3cd66f124e37..9410de60c8d2 100644 --- a/lib/matplotlib/tests/test_dviread.py +++ b/lib/matplotlib/tests/test_dviread.py @@ -52,7 +52,7 @@ def test_PsfontsMap(monkeypatch): # Missing font with pytest.raises(KeyError) as exc: fontmap[b'no-such-font'] - assert b'no-such-font' in bytes(exc.value) + assert 'no-such-font' in str(exc.value) @skip_if_command_unavailable(["kpsewhich", "-version"]) From 25a8fed83d456987958d82b525a3dd7f89a5d4ec Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 11 Feb 2017 18:24:13 -0500 Subject: [PATCH 20/23] ENH: make texFontMap a property Only used once in the code, but makes the lazy parsing more standard. --- lib/matplotlib/backends/backend_pdf.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 8d6b0e68c619..c007e44964cc 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -494,7 +494,7 @@ def __init__(self, filename, metadata=None): self.fontNames = {} # maps filenames to internal font names self.nextFont = 1 # next free internal font name self.dviFontInfo = {} # maps dvi font names to embedding information - self.texFontMap = None # maps TeX font names to PostScript fonts + self._texFontMap = None # maps TeX font names to PostScript fonts # differently encoded Type-1 fonts may share the same descriptor self.type1Descriptors = {} self.used_characters = {} @@ -663,6 +663,16 @@ def fontName(self, fontprop): return Fx + @property + def texFontMap(self): + # lazy-load texFontMap, it takes a while to parse + # and usetex is a relatively rare use case + if self._texFontMap is None: + self._texFontMap = dviread.PsfontsMap( + dviread.find_tex_file('pdftex.map')) + + return self._texFontMap + def dviFontName(self, dvifont): """ Given a dvi font object, return a name suitable for Op.selectfont. @@ -674,12 +684,6 @@ def dviFontName(self, dvifont): if dvi_info is not None: return dvi_info.pdfname - # lazy-load texFontMap, it takes a while to parse - # and usetex is a relatively rare use case - if self.texFontMap is None: - self.texFontMap = \ - dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) - psfont = self.texFontMap[dvifont.texname] if psfont.filename is None: raise ValueError( From 5ba21b0fc884318dcea0915978b4559206687ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sun, 12 Feb 2017 20:56:51 +0200 Subject: [PATCH 21/23] Use file system encoding for the psfonts file name --- lib/matplotlib/dviread.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index 53e19f09b677..c323cc44de66 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -839,7 +839,8 @@ def __init__(self, filename): self._font = {} self._filename = filename if six.PY3 and isinstance(filename, bytes): - self._filename = filename.decode('ascii', errors='replace') + encoding = sys.getfilesystemencoding() or 'utf-8' + self._filename = filename.decode(encoding, errors='replace') with open(filename, 'rb') as file: self._parse(file) From 10135bf13d880fcc977852af2c03522e7941d3e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sun, 12 Feb 2017 21:47:48 +0200 Subject: [PATCH 22/23] Document minor API changes And add an underscore in the beginning of the method whose signature changes. --- doc/api/api_changes/2017-02-12-JKS.rst | 8 ++++++++ lib/matplotlib/backends/backend_pdf.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 doc/api/api_changes/2017-02-12-JKS.rst diff --git a/doc/api/api_changes/2017-02-12-JKS.rst b/doc/api/api_changes/2017-02-12-JKS.rst new file mode 100644 index 000000000000..490f1ea1e87c --- /dev/null +++ b/doc/api/api_changes/2017-02-12-JKS.rst @@ -0,0 +1,8 @@ +Changes to PDF backend methods +`````````````````````````````` + +The methods `embedTeXFont` and `tex_font_mapping` of +`matplotlib.backend_pdf.PdfFile` have been removed. +It is unlikely that external users would have called +these methods, which are related to the font system +internal to the PDF backend. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index c007e44964cc..1de31274fa19 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -711,7 +711,7 @@ def writeFonts(self): Fx = info.pdfname matplotlib.verbose.report('Embedding Type-1 font %s from dvi' % dviname, 'debug') - fonts[Fx] = self.embedTeXFont(info) + fonts[Fx] = self._embedTeXFont(info) for filename in sorted(self.fontNames): Fx = self.fontNames[filename] matplotlib.verbose.report('Embedding font %s' % filename, 'debug') @@ -740,7 +740,7 @@ def _write_afm_font(self, filename): self.writeObject(fontdictObject, fontdict) return fontdictObject - def embedTeXFont(self, fontinfo): + def _embedTeXFont(self, fontinfo): msg = ('Embedding TeX font {0} - fontinfo={1}' .format(fontinfo.dvifont.texname, fontinfo.__dict__)) matplotlib.verbose.report(msg, 'debug') From 6de98131235f68ccdf838b17844da72fd40bfe4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sun, 12 Feb 2017 21:57:17 +0200 Subject: [PATCH 23/23] Explain named group ordering --- lib/matplotlib/dviread.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index c323cc44de66..78904522f2dc 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -911,16 +911,23 @@ def _parse(self, file): effects, encoding, filename = b'', None, None words = word_re.finditer(line) + # The named groups are mutually exclusive and are + # referenced below at an estimated order of probability of + # occurrence based on looking at my copy of pdftex.map. + # The font names are probably unquoted: w = next(words) texname = w.group('eff2') or w.group('eff1') w = next(words) psname = w.group('eff2') or w.group('eff1') for w in words: + # Any effects are almost always quoted: eff = w.group('eff1') or w.group('eff2') if eff: effects = eff continue + # Encoding files usually have the .enc suffix + # and almost never need quoting: enc = (w.group('enc4') or w.group('enc3') or w.group('enc2') or w.group('enc1')) if enc: @@ -931,6 +938,7 @@ def _parse(self, file): 'debug') encoding = enc continue + # File names are probably unquoted: filename = w.group('file2') or w.group('file1') effects_dict = {}