Skip to content

Commit 2418413

Browse files
committed
Implement reading dvi files into the cache
Rename the Dvi class to _DviReader and use it only for storing the files into the cache. The new Dvi class reads from the cache, after calling _DviReader to insert the file into it.
1 parent 545b407 commit 2418413

File tree

2 files changed

+144
-119
lines changed

2 files changed

+144
-119
lines changed

lib/matplotlib/dviread.py

+139-114
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from collections import namedtuple
2121
import enum
2222
from functools import lru_cache, partial, wraps
23+
from itertools import chain
2324
import logging
2425
import os
2526
import re
@@ -168,28 +169,23 @@ def wrapper(self, byte):
168169
return decorate
169170

170171

171-
class Dvi(object):
172-
"""
173-
A reader for a dvi ("device-independent") file, as produced by TeX.
174-
The current implementation can only iterate through pages in order.
172+
def _keep(func, keys):
173+
"""Return mapping from each k in keys to func(k)
174+
such that func(k) is not None"""
175+
return dict((k, v) for k, v in zip(keys, map(func, keys)) if v is not None)
175176

176-
This class can be used as a context manager to close the underlying
177-
file upon exit. Pages can be read via iteration. Here is an overly
178-
simple way to extract text without trying to detect whitespace::
179177

180-
>>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
181-
>>> for page in dvi:
182-
>>> print(''.join(chr(t.glyph) for t in page.text))
178+
class _DviReader(object):
179+
"""
180+
A reader for a dvi ("device-independent") file, as produced by TeX.
181+
This implementation is only used to store the file in a cache, from
182+
which it is read by Dvi.
183183
184184
Parameters
185185
----------
186186
187187
filename : str
188188
dvi file to read
189-
dpi : number or None
190-
Dots per inch, can be floating-point; this affects the
191-
coordinates returned. Use None to get TeX's internal units
192-
which are likely only useful for debugging.
193189
cache : TeXSupportCache instance, optional
194190
Support file cache instance, defaults to the TeXSupportCache
195191
singleton.
@@ -198,28 +194,28 @@ class Dvi(object):
198194
_dtable = [None] * 256
199195
_dispatch = partial(_dispatch, _dtable)
200196

201-
def __init__(self, filename, dpi, cache=None):
202-
"""
203-
Read the data from the file named *filename* and convert
204-
TeX's internal units to units of *dpi* per inch.
205-
*dpi* only sets the units and does not limit the resolution.
206-
Use None to return TeX's internal units.
207-
"""
197+
def __init__(self, filename, cache=None):
208198
_log.debug('Dvi: %s', filename)
209199
if cache is None:
210200
cache = TeXSupportCache.get_cache()
211201
self.cache = cache
212202
self.file = open(filename, 'rb')
213-
self.dpi = dpi
214203
self.fonts = {}
204+
self.recursive_fonts = set()
215205
self.state = _dvistate.pre
216206
self.baseline = self._get_baseline(filename)
217-
self.fontnames = sorted(set(self._read_fonts()))
207+
self.fontnames = set(self._read_fonts())
218208
# populate kpsewhich cache with font pathnames
219209
find_tex_files([x + suffix for x in self.fontnames
220210
for suffix in ('.tfm', '.vf', '.pfb')],
221211
cache)
222-
cache.optimize()
212+
self._tfm = _keep(_tfmfile, self.fontnames)
213+
self._vf = _keep(_vffile, self.fontnames)
214+
for vf in self._vf.values():
215+
self.fontnames.update(vf.fontnames)
216+
217+
def close(self):
218+
self.file.close()
223219

224220
def _get_baseline(self, filename):
225221
if rcParams['text.latex.preview']:
@@ -232,88 +228,32 @@ def _get_baseline(self, filename):
232228
return float(depth)
233229
return None
234230

235-
def __enter__(self):
236-
"""
237-
Context manager enter method, does nothing.
238-
"""
239-
return self
240-
241-
def __exit__(self, etype, evalue, etrace):
242-
"""
243-
Context manager exit method, closes the underlying file if it is open.
244-
"""
245-
self.close()
246-
247-
def __iter__(self):
248-
"""
249-
Iterate through the pages of the file.
250-
251-
Yields
252-
------
253-
Page
254-
Details of all the text and box objects on the page.
255-
The Page tuple contains lists of Text and Box tuples and
256-
the page dimensions, and the Text and Box tuples contain
257-
coordinates transformed into a standard Cartesian
258-
coordinate system at the dpi value given when initializing.
259-
The coordinates are floating point numbers, but otherwise
260-
precision is not lost and coordinate values are not clipped to
261-
integers.
262-
"""
263-
while True:
264-
have_page = self._read()
265-
if have_page:
266-
yield self._output()
267-
else:
268-
break
269-
270-
def close(self):
271-
"""
272-
Close the underlying file if it is open.
273-
"""
274-
if not self.file.closed:
275-
self.file.close()
276-
277-
def _output(self):
278-
"""
279-
Output the text and boxes belonging to the most recent page.
280-
page = dvi._output()
281-
"""
282-
minx, miny, maxx, maxy = np.inf, np.inf, -np.inf, -np.inf
283-
maxy_pure = -np.inf
284-
for elt in self.text + self.boxes:
285-
if isinstance(elt, Box):
286-
x, y, h, w = elt
287-
e = 0 # zero depth
288-
else: # glyph
289-
x, y, font, g, w = elt
290-
h, e = font._height_depth_of(g)
291-
minx = min(minx, x)
292-
miny = min(miny, y - h)
293-
maxx = max(maxx, x + w)
294-
maxy = max(maxy, y + e)
295-
maxy_pure = max(maxy_pure, y)
296-
297-
if self.dpi is None:
298-
# special case for ease of debugging: output raw dvi coordinates
299-
return Page(text=self.text, boxes=self.boxes,
300-
width=maxx-minx, height=maxy_pure-miny,
301-
descent=maxy-maxy_pure)
302-
303-
# convert from TeX's "scaled points" to dpi units
304-
d = self.dpi / (72.27 * 2**16)
305-
if self.baseline is None:
306-
descent = (maxy - maxy_pure) * d
307-
else:
308-
descent = self.baseline
309-
310-
text = [Text((x-minx)*d, (maxy-y)*d - descent, f, g, w*d)
311-
for (x, y, f, g, w) in self.text]
312-
boxes = [Box((x-minx)*d, (maxy-y)*d - descent, h*d, w*d)
313-
for (x, y, h, w) in self.boxes]
314-
315-
return Page(text=text, boxes=boxes, width=(maxx-minx)*d,
316-
height=(maxy_pure-miny)*d, descent=descent)
231+
def store(self):
232+
c = self.cache
233+
with c.connection as t:
234+
fileid = c.dvi_new_file(self.file.name, t)
235+
_log.debug('fontnames is %s', self.fontnames)
236+
fontid = c.dvi_font_sync_ids(self.fontnames, t)
237+
238+
pageno = 0
239+
while True:
240+
if not self._read():
241+
break
242+
for seq, elt in enumerate(self.text + self.boxes):
243+
if isinstance(elt, Box):
244+
c.dvi_add_box(elt, fileid, pageno, seq, t)
245+
else:
246+
texname = elt.font.texname.decode('ascii')
247+
c.dvi_add_text(elt, fileid, pageno, seq,
248+
fontid[texname], t)
249+
pageno += 1
250+
251+
for dvifont in chain(self.recursive_fonts, self.fonts.values()):
252+
c.dvi_font_sync_metrics(dvifont, t)
253+
if self.baseline is not None:
254+
c.dvi_add_baseline(fileid, 0, self.baseline, t)
255+
c.optimize()
256+
return fileid
317257

318258
def _read_fonts(self):
319259
"""Read the postamble of the file and return a list of fonts used."""
@@ -360,6 +300,8 @@ def _read_fonts(self):
360300
_arg(1, False, self, None),
361301
_arg(1, False, self, None))
362302
fontname = file.read(a + length)[-length:].decode('ascii')
303+
_log.debug('dvi._read_fonts(%s): encountered %s',
304+
self.file.name, fontname)
363305
fonts.append(fontname)
364306
elif byte == 249:
365307
break
@@ -426,6 +368,7 @@ def _put_char_real(self, char):
426368
for x, y, f, g, w in font._vf[char].text:
427369
newf = DviFont(scale=_mul2012(scale, f.scale),
428370
tfm=f._tfm, texname=f.texname, vf=f._vf)
371+
self.recursive_fonts.add(newf)
429372
self.text.append(Text(self.h + _mul2012(x, scale),
430373
self.v + _mul2012(y, scale),
431374
newf, g, newf._width_of(g)))
@@ -522,14 +465,12 @@ def _fnt_def(self, k, c, s, d, a, l):
522465
def _fnt_def_real(self, k, c, s, d, a, l):
523466
n = self.file.read(a + l)
524467
fontname = n[-l:].decode('ascii')
525-
tfm = _tfmfile(fontname)
468+
tfm = self._tfm.get(fontname)
526469
if tfm is None:
527470
raise FileNotFoundError("missing font metrics file: %s" % fontname)
528471
if c != 0 and tfm.checksum != 0 and c != tfm.checksum:
529472
raise ValueError('tfm checksum mismatch: %s' % n)
530-
531-
vf = _vffile(fontname)
532-
473+
vf = self._vf.get(fontname)
533474
self.fonts[k] = DviFont(scale=s, tfm=tfm, texname=n, vf=vf)
534475

535476
@_dispatch(247, state=_dvistate.pre, args=('u1', 'u4', 'u4', 'u4', 'u1'))
@@ -669,7 +610,89 @@ def _height_depth_of(self, char):
669610
return result
670611

671612

672-
class Vf(Dvi):
613+
class Dvi(object):
614+
"""
615+
A representation of a dvi ("device-independent") file, as produced by TeX.
616+
617+
Parameters
618+
----------
619+
620+
filename : str
621+
dpi : float or None
622+
cache : TeXSupportCache, optional
623+
624+
Attributes
625+
----------
626+
627+
filename : str
628+
dpi : float or None
629+
cache : TeXSupportCache
630+
631+
632+
"""
633+
def __init__(self, filename, dpi, cache=None):
634+
if cache is None:
635+
cache = TeXSupportCache.get_cache()
636+
self.cache = cache
637+
self.filename = filename
638+
self.dpi = dpi
639+
self._filename_id = cache.dvi_id(filename)
640+
if self._filename_id is None:
641+
self._filename_id = _DviReader(filename, cache).store()
642+
self._fonts = cache.dvi_fonts(self._filename_id)
643+
644+
def __enter__(self):
645+
return self
646+
647+
def __exit__(self, etype, evalue, etrace):
648+
pass
649+
650+
def __getitem__(self, pageno):
651+
if self.cache.dvi_page_exists(self._filename_id, pageno):
652+
return self._output(pageno)
653+
raise IndexError
654+
655+
def _output(self, page):
656+
extrema = self.cache.dvi_page_boundingbox(self._filename_id, page)
657+
min_x, min_y, max_x, max_y, max_y_pure = (
658+
extrema[n] for n in ('min_x', 'min_y', 'max_x',
659+
'max_y', 'max_y_pure'))
660+
boxes = self.cache.dvi_page_boxes(self._filename_id, page)
661+
text = self.cache.dvi_page_text(self._filename_id, page)
662+
baseline = self.cache.dvi_get_baseline(self._filename_id, page)
663+
if self.dpi is None:
664+
return Page(text=[Text(x=row['x'], y=row['y'],
665+
font=self._fonts[(row['texname'],
666+
row['fontscale'])],
667+
glyph=row['glyph'], width=row['width'])
668+
for row in text],
669+
boxes=[Box(x=row['x'], y=row['y'],
670+
height=row['height'], width=row['width'])
671+
for row in boxes],
672+
width=max_x-min_x,
673+
height=max_y_pure-min_y,
674+
descent=max_y-max_y_pure)
675+
d = self.dpi / (72.27 * 2**16)
676+
descent = \
677+
baseline if baseline is not None else (max_y - max_y_pure) * d
678+
679+
return Page(text=[Text((row['x'] - min_x) * d,
680+
(max_y - row['y']) * d - descent,
681+
self._fonts[(row['texname'], row['fontscale'])],
682+
row['glyph'],
683+
row['width'] * d)
684+
for row in text],
685+
boxes=[Box((row['x'] - min_x) * d,
686+
(max_y - row['y']) * d - descent,
687+
row['height'] * d,
688+
row['width'] * d)
689+
for row in boxes],
690+
width=(max_x - min_x) * d,
691+
height=(max_y_pure - min_y) * d,
692+
descent=descent)
693+
694+
695+
class Vf(_DviReader):
673696
"""
674697
A virtual font (\\*.vf file) containing subroutines for dvi files.
675698
@@ -693,12 +716,12 @@ class Vf(Dvi):
693716
694717
The virtual font format is a derivative of dvi:
695718
http://mirrors.ctan.org/info/knuth/virtual-fonts
696-
This class reuses some of the machinery of `Dvi`
719+
This class reuses some of the machinery of `_DviReader`
697720
but replaces the `_read` loop and dispatch mechanism.
698721
"""
699722

700723
def __init__(self, filename, cache=None):
701-
Dvi.__init__(self, filename, dpi=0, cache=cache)
724+
_DviReader.__init__(self, filename, cache=cache)
702725
try:
703726
self._first_font = None
704727
self._chars = {}
@@ -723,6 +746,8 @@ def _read_fonts(self):
723746
_, _, _, a, length = [self._arg(x) for x in (4, 4, 4, 1, 1)]
724747
fontname = self.file.read(a + length)[-length:].decode('ascii')
725748
fonts.append(fontname)
749+
_log.debug('Vf._read_fonts(%s): encountered %s',
750+
self.file.name, fontname)
726751
elif byte == 247:
727752
_, k = self._arg(1), self._arg(1)
728753
_ = self.file.read(k)
@@ -752,7 +777,7 @@ def _read(self):
752777
if byte in (139, 140) or byte >= 243:
753778
raise ValueError(
754779
"Inappropriate opcode %d in vf file" % byte)
755-
Dvi._dtable[byte](self, byte)
780+
_DviReader._dtable[byte](self, byte)
756781
continue
757782

758783
# We are outside a packet

lib/matplotlib/tests/test_dviread.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ def test_dviread():
8080
@skip_if_command_unavailable(["kpsewhich", "-version"])
8181
def test_dviread_get_fonts():
8282
dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread')
83-
with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi:
84-
assert dvi.fontnames == \
85-
['cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7']
86-
with dr.Vf(os.path.join(dir, 'virtual.vf')) as vf:
87-
assert vf.fontnames == ['cmex10', 'cmr10']
83+
dvi = dr._DviReader(os.path.join(dir, 'test.dvi'), None)
84+
assert dvi.fontnames == \
85+
{'cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7'}
86+
vf = dr.Vf(os.path.join(dir, 'virtual.vf'))
87+
assert vf.fontnames == {'cmex10', 'cmr10'}
8888

8989

9090
def test_dviread_get_fonts_error_handling():

0 commit comments

Comments
 (0)