20
20
from collections import namedtuple
21
21
import enum
22
22
from functools import lru_cache , partial , wraps
23
+ from itertools import chain
23
24
import logging
24
25
import os
25
26
import re
@@ -168,28 +169,23 @@ def wrapper(self, byte):
168
169
return decorate
169
170
170
171
171
- class Dvi ( object ):
172
- """
173
- A reader for a dvi ("device-independent") file, as produced by TeX.
174
- The current implementation can only iterate through pages in order.
172
+ def _keep ( func , keys ):
173
+ """Return mapping from each k in keys to func(k)
174
+ such that func(k) is not None"""
175
+ return dict (( k , v ) for k , v in zip ( keys , map ( func , keys )) if v is not None )
175
176
176
- This class can be used as a context manager to close the underlying
177
- file upon exit. Pages can be read via iteration. Here is an overly
178
- simple way to extract text without trying to detect whitespace::
179
177
180
- >>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
181
- >>> for page in dvi:
182
- >>> print(''.join(chr(t.glyph) for t in page.text))
178
+ class _DviReader (object ):
179
+ """
180
+ A reader for a dvi ("device-independent") file, as produced by TeX.
181
+ This implementation is only used to store the file in a cache, from
182
+ which it is read by Dvi.
183
183
184
184
Parameters
185
185
----------
186
186
187
187
filename : str
188
188
dvi file to read
189
- dpi : number or None
190
- Dots per inch, can be floating-point; this affects the
191
- coordinates returned. Use None to get TeX's internal units
192
- which are likely only useful for debugging.
193
189
cache : TeXSupportCache instance, optional
194
190
Support file cache instance, defaults to the TeXSupportCache
195
191
singleton.
@@ -198,28 +194,28 @@ class Dvi(object):
198
194
_dtable = [None ] * 256
199
195
_dispatch = partial (_dispatch , _dtable )
200
196
201
- def __init__ (self , filename , dpi , cache = None ):
202
- """
203
- Read the data from the file named *filename* and convert
204
- TeX's internal units to units of *dpi* per inch.
205
- *dpi* only sets the units and does not limit the resolution.
206
- Use None to return TeX's internal units.
207
- """
197
+ def __init__ (self , filename , cache = None ):
208
198
_log .debug ('Dvi: %s' , filename )
209
199
if cache is None :
210
200
cache = TeXSupportCache .get_cache ()
211
201
self .cache = cache
212
202
self .file = open (filename , 'rb' )
213
- self .dpi = dpi
214
203
self .fonts = {}
204
+ self .recursive_fonts = set ()
215
205
self .state = _dvistate .pre
216
206
self .baseline = self ._get_baseline (filename )
217
- self .fontnames = sorted ( set (self ._read_fonts () ))
207
+ self .fontnames = set (self ._read_fonts ())
218
208
# populate kpsewhich cache with font pathnames
219
209
find_tex_files ([x + suffix for x in self .fontnames
220
210
for suffix in ('.tfm' , '.vf' , '.pfb' )],
221
211
cache )
222
- cache .optimize ()
212
+ self ._tfm = _keep (_tfmfile , self .fontnames )
213
+ self ._vf = _keep (_vffile , self .fontnames )
214
+ for vf in self ._vf .values ():
215
+ self .fontnames .update (vf .fontnames )
216
+
217
+ def close (self ):
218
+ self .file .close ()
223
219
224
220
def _get_baseline (self , filename ):
225
221
if rcParams ['text.latex.preview' ]:
@@ -232,88 +228,32 @@ def _get_baseline(self, filename):
232
228
return float (depth )
233
229
return None
234
230
235
- def __enter__ (self ):
236
- """
237
- Context manager enter method, does nothing.
238
- """
239
- return self
240
-
241
- def __exit__ (self , etype , evalue , etrace ):
242
- """
243
- Context manager exit method, closes the underlying file if it is open.
244
- """
245
- self .close ()
246
-
247
- def __iter__ (self ):
248
- """
249
- Iterate through the pages of the file.
250
-
251
- Yields
252
- ------
253
- Page
254
- Details of all the text and box objects on the page.
255
- The Page tuple contains lists of Text and Box tuples and
256
- the page dimensions, and the Text and Box tuples contain
257
- coordinates transformed into a standard Cartesian
258
- coordinate system at the dpi value given when initializing.
259
- The coordinates are floating point numbers, but otherwise
260
- precision is not lost and coordinate values are not clipped to
261
- integers.
262
- """
263
- while True :
264
- have_page = self ._read ()
265
- if have_page :
266
- yield self ._output ()
267
- else :
268
- break
269
-
270
- def close (self ):
271
- """
272
- Close the underlying file if it is open.
273
- """
274
- if not self .file .closed :
275
- self .file .close ()
276
-
277
- def _output (self ):
278
- """
279
- Output the text and boxes belonging to the most recent page.
280
- page = dvi._output()
281
- """
282
- minx , miny , maxx , maxy = np .inf , np .inf , - np .inf , - np .inf
283
- maxy_pure = - np .inf
284
- for elt in self .text + self .boxes :
285
- if isinstance (elt , Box ):
286
- x , y , h , w = elt
287
- e = 0 # zero depth
288
- else : # glyph
289
- x , y , font , g , w = elt
290
- h , e = font ._height_depth_of (g )
291
- minx = min (minx , x )
292
- miny = min (miny , y - h )
293
- maxx = max (maxx , x + w )
294
- maxy = max (maxy , y + e )
295
- maxy_pure = max (maxy_pure , y )
296
-
297
- if self .dpi is None :
298
- # special case for ease of debugging: output raw dvi coordinates
299
- return Page (text = self .text , boxes = self .boxes ,
300
- width = maxx - minx , height = maxy_pure - miny ,
301
- descent = maxy - maxy_pure )
302
-
303
- # convert from TeX's "scaled points" to dpi units
304
- d = self .dpi / (72.27 * 2 ** 16 )
305
- if self .baseline is None :
306
- descent = (maxy - maxy_pure ) * d
307
- else :
308
- descent = self .baseline
309
-
310
- text = [Text ((x - minx )* d , (maxy - y )* d - descent , f , g , w * d )
311
- for (x , y , f , g , w ) in self .text ]
312
- boxes = [Box ((x - minx )* d , (maxy - y )* d - descent , h * d , w * d )
313
- for (x , y , h , w ) in self .boxes ]
314
-
315
- return Page (text = text , boxes = boxes , width = (maxx - minx )* d ,
316
- height = (maxy_pure - miny )* d , descent = descent )
231
+ def store (self ):
232
+ c = self .cache
233
+ with c .connection as t :
234
+ fileid = c .dvi_new_file (self .file .name , t )
235
+ _log .debug ('fontnames is %s' , self .fontnames )
236
+ fontid = c .dvi_font_sync_ids (self .fontnames , t )
237
+
238
+ pageno = 0
239
+ while True :
240
+ if not self ._read ():
241
+ break
242
+ for seq , elt in enumerate (self .text + self .boxes ):
243
+ if isinstance (elt , Box ):
244
+ c .dvi_add_box (elt , fileid , pageno , seq , t )
245
+ else :
246
+ texname = elt .font .texname .decode ('ascii' )
247
+ c .dvi_add_text (elt , fileid , pageno , seq ,
248
+ fontid [texname ], t )
249
+ pageno += 1
250
+
251
+ for dvifont in chain (self .recursive_fonts , self .fonts .values ()):
252
+ c .dvi_font_sync_metrics (dvifont , t )
253
+ if self .baseline is not None :
254
+ c .dvi_add_baseline (fileid , 0 , self .baseline , t )
255
+ c .optimize ()
256
+ return fileid
317
257
318
258
def _read_fonts (self ):
319
259
"""Read the postamble of the file and return a list of fonts used."""
@@ -360,6 +300,8 @@ def _read_fonts(self):
360
300
_arg (1 , False , self , None ),
361
301
_arg (1 , False , self , None ))
362
302
fontname = file .read (a + length )[- length :].decode ('ascii' )
303
+ _log .debug ('dvi._read_fonts(%s): encountered %s' ,
304
+ self .file .name , fontname )
363
305
fonts .append (fontname )
364
306
elif byte == 249 :
365
307
break
@@ -426,6 +368,7 @@ def _put_char_real(self, char):
426
368
for x , y , f , g , w in font ._vf [char ].text :
427
369
newf = DviFont (scale = _mul2012 (scale , f .scale ),
428
370
tfm = f ._tfm , texname = f .texname , vf = f ._vf )
371
+ self .recursive_fonts .add (newf )
429
372
self .text .append (Text (self .h + _mul2012 (x , scale ),
430
373
self .v + _mul2012 (y , scale ),
431
374
newf , g , newf ._width_of (g )))
@@ -522,14 +465,12 @@ def _fnt_def(self, k, c, s, d, a, l):
522
465
def _fnt_def_real (self , k , c , s , d , a , l ):
523
466
n = self .file .read (a + l )
524
467
fontname = n [- l :].decode ('ascii' )
525
- tfm = _tfmfile (fontname )
468
+ tfm = self . _tfm . get (fontname )
526
469
if tfm is None :
527
470
raise FileNotFoundError ("missing font metrics file: %s" % fontname )
528
471
if c != 0 and tfm .checksum != 0 and c != tfm .checksum :
529
472
raise ValueError ('tfm checksum mismatch: %s' % n )
530
-
531
- vf = _vffile (fontname )
532
-
473
+ vf = self ._vf .get (fontname )
533
474
self .fonts [k ] = DviFont (scale = s , tfm = tfm , texname = n , vf = vf )
534
475
535
476
@_dispatch (247 , state = _dvistate .pre , args = ('u1' , 'u4' , 'u4' , 'u4' , 'u1' ))
@@ -669,7 +610,89 @@ def _height_depth_of(self, char):
669
610
return result
670
611
671
612
672
- class Vf (Dvi ):
613
+ class Dvi (object ):
614
+ """
615
+ A representation of a dvi ("device-independent") file, as produced by TeX.
616
+
617
+ Parameters
618
+ ----------
619
+
620
+ filename : str
621
+ dpi : float or None
622
+ cache : TeXSupportCache, optional
623
+
624
+ Attributes
625
+ ----------
626
+
627
+ filename : str
628
+ dpi : float or None
629
+ cache : TeXSupportCache
630
+
631
+
632
+ """
633
+ def __init__ (self , filename , dpi , cache = None ):
634
+ if cache is None :
635
+ cache = TeXSupportCache .get_cache ()
636
+ self .cache = cache
637
+ self .filename = filename
638
+ self .dpi = dpi
639
+ self ._filename_id = cache .dvi_id (filename )
640
+ if self ._filename_id is None :
641
+ self ._filename_id = _DviReader (filename , cache ).store ()
642
+ self ._fonts = cache .dvi_fonts (self ._filename_id )
643
+
644
+ def __enter__ (self ):
645
+ return self
646
+
647
+ def __exit__ (self , etype , evalue , etrace ):
648
+ pass
649
+
650
+ def __getitem__ (self , pageno ):
651
+ if self .cache .dvi_page_exists (self ._filename_id , pageno ):
652
+ return self ._output (pageno )
653
+ raise IndexError
654
+
655
+ def _output (self , page ):
656
+ extrema = self .cache .dvi_page_boundingbox (self ._filename_id , page )
657
+ min_x , min_y , max_x , max_y , max_y_pure = (
658
+ extrema [n ] for n in ('min_x' , 'min_y' , 'max_x' ,
659
+ 'max_y' , 'max_y_pure' ))
660
+ boxes = self .cache .dvi_page_boxes (self ._filename_id , page )
661
+ text = self .cache .dvi_page_text (self ._filename_id , page )
662
+ baseline = self .cache .dvi_get_baseline (self ._filename_id , page )
663
+ if self .dpi is None :
664
+ return Page (text = [Text (x = row ['x' ], y = row ['y' ],
665
+ font = self ._fonts [(row ['texname' ],
666
+ row ['fontscale' ])],
667
+ glyph = row ['glyph' ], width = row ['width' ])
668
+ for row in text ],
669
+ boxes = [Box (x = row ['x' ], y = row ['y' ],
670
+ height = row ['height' ], width = row ['width' ])
671
+ for row in boxes ],
672
+ width = max_x - min_x ,
673
+ height = max_y_pure - min_y ,
674
+ descent = max_y - max_y_pure )
675
+ d = self .dpi / (72.27 * 2 ** 16 )
676
+ descent = \
677
+ baseline if baseline is not None else (max_y - max_y_pure ) * d
678
+
679
+ return Page (text = [Text ((row ['x' ] - min_x ) * d ,
680
+ (max_y - row ['y' ]) * d - descent ,
681
+ self ._fonts [(row ['texname' ], row ['fontscale' ])],
682
+ row ['glyph' ],
683
+ row ['width' ] * d )
684
+ for row in text ],
685
+ boxes = [Box ((row ['x' ] - min_x ) * d ,
686
+ (max_y - row ['y' ]) * d - descent ,
687
+ row ['height' ] * d ,
688
+ row ['width' ] * d )
689
+ for row in boxes ],
690
+ width = (max_x - min_x ) * d ,
691
+ height = (max_y_pure - min_y ) * d ,
692
+ descent = descent )
693
+
694
+
695
+ class Vf (_DviReader ):
673
696
"""
674
697
A virtual font (\\ *.vf file) containing subroutines for dvi files.
675
698
@@ -693,12 +716,12 @@ class Vf(Dvi):
693
716
694
717
The virtual font format is a derivative of dvi:
695
718
http://mirrors.ctan.org/info/knuth/virtual-fonts
696
- This class reuses some of the machinery of `Dvi `
719
+ This class reuses some of the machinery of `_DviReader `
697
720
but replaces the `_read` loop and dispatch mechanism.
698
721
"""
699
722
700
723
def __init__ (self , filename , cache = None ):
701
- Dvi .__init__ (self , filename , dpi = 0 , cache = cache )
724
+ _DviReader .__init__ (self , filename , cache = cache )
702
725
try :
703
726
self ._first_font = None
704
727
self ._chars = {}
@@ -723,6 +746,8 @@ def _read_fonts(self):
723
746
_ , _ , _ , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
724
747
fontname = self .file .read (a + length )[- length :].decode ('ascii' )
725
748
fonts .append (fontname )
749
+ _log .debug ('Vf._read_fonts(%s): encountered %s' ,
750
+ self .file .name , fontname )
726
751
elif byte == 247 :
727
752
_ , k = self ._arg (1 ), self ._arg (1 )
728
753
_ = self .file .read (k )
@@ -752,7 +777,7 @@ def _read(self):
752
777
if byte in (139 , 140 ) or byte >= 243 :
753
778
raise ValueError (
754
779
"Inappropriate opcode %d in vf file" % byte )
755
- Dvi ._dtable [byte ](self , byte )
780
+ _DviReader ._dtable [byte ](self , byte )
756
781
continue
757
782
758
783
# We are outside a packet
0 commit comments