-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Handle dvi font names as ASCII bytestrings #6977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
705b021
dbc8b9e
93fad55
4874e4e
a130ba7
0f0e41a
a7b5772
803a96e
ec5d80e
fe52808
aa8c4f6
9de07aa
c87b653
2e19a61
119934a
8fa303f
94587b1
254e3df
a8674b3
25a8fed
92e2c52
5ba21b0
10135bf
6de9813
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Dvi is a binary format that includes some ASCII strings such as TeX names of some fonts. The associated files such as psfonts.map need to be ASCII too. This patch changes their handling to keep them as binary strings all the time. This avoids the ugly workaround try: result = some_mapping[texname] except KeyError: result = some_mapping[texname.decode('ascii')] which is essentially saying that texname is sometimes a string, sometimes a bytestring. The workaround masks real KeyErrors, leading to incomprehensible error messages such as in #6516.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -510,7 +510,7 @@ class DviFont(object): | |
Name of the font as used internally by TeX and friends. This | ||
is usually very different from any external font names, and | ||
:class:`dviread.PsfontsMap` can be used to find the external | ||
name of the font. | ||
name of the font. ASCII bytestring. | ||
|
||
.. attribute:: size | ||
|
||
|
@@ -526,8 +526,7 @@ class DviFont(object): | |
__slots__ = ('texname', 'size', 'widths', '_scale', '_vf', '_tfm') | ||
|
||
def __init__(self, scale, tfm, texname, vf): | ||
if six.PY3 and isinstance(texname, bytes): | ||
texname = texname.decode('ascii') | ||
assert(isinstance(texname, bytes)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No parentheses here. (And in general I think we should avoid bare asserts in favor of raising explicit exceptions.) |
||
self._scale, self._tfm, self.texname, self._vf = \ | ||
scale, tfm, texname, vf | ||
self.size = scale * (72.0 / (72.27 * 2**16)) | ||
|
@@ -807,42 +806,42 @@ def __init__(self, filename): | |
self._parse(file) | ||
|
||
def __getitem__(self, texname): | ||
try: | ||
result = self._font[texname] | ||
except KeyError: | ||
result = self._font[texname.decode('ascii')] | ||
assert(isinstance(texname, bytes)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Parentheses removal, from @anntzer. |
||
result = self._font[texname] | ||
fn, enc = result.filename, result.encoding | ||
if fn is not None and not fn.startswith('/'): | ||
if fn is not None and not fn.startswith(b'/'): | ||
fn = find_tex_file(fn) | ||
if enc is not None and not enc.startswith('/'): | ||
if enc is not None and not enc.startswith(b'/'): | ||
enc = find_tex_file(result.encoding) | ||
return result._replace(filename=fn, encoding=enc) | ||
|
||
def _parse(self, file): | ||
"""Parse each line into words.""" | ||
"""Parse each line into words and process them.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This docstring seems rather pointless (it is no news that a method named |
||
|
||
for line in file: | ||
line = six.b(line) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not open the file in binary mode in |
||
line = line.strip() | ||
if line == '' or line.startswith('%'): | ||
if line == b'' or line.startswith(b'%'): | ||
continue | ||
words, pos = [], 0 | ||
while pos < len(line): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand the logic of this method correctly it should be possible to rewrite this loop simply as something like (untested)
right? (as |
||
if line[pos] == '"': # double quoted word | ||
if line[pos:pos+1] == b'"': # double quoted word | ||
pos += 1 | ||
end = line.index('"', pos) | ||
end = line.index(b'"', pos) | ||
words.append(line[pos:end]) | ||
pos = end + 1 | ||
else: # ordinary word | ||
end = line.find(' ', pos+1) | ||
end = line.find(b' ', pos+1) | ||
if end == -1: | ||
end = len(line) | ||
words.append(line[pos:end]) | ||
pos = end | ||
while pos < len(line) and line[pos] == ' ': | ||
while pos < len(line) and line[pos:pos+1] == b' ': | ||
pos += 1 | ||
self._register(words) | ||
|
||
def _register(self, words): | ||
"""Register a font described by "words". | ||
"""Register a font described by "words", a sequence of bytestrings. | ||
|
||
The format is, AFAIK: texname fontname [effects and filenames] | ||
Effects are PostScript snippets like ".177 SlantFont", | ||
|
@@ -861,19 +860,23 @@ def _register(self, words): | |
# http://tex.stackexchange.com/questions/10826/ | ||
# http://article.gmane.org/gmane.comp.tex.pdftex/4914 | ||
|
||
# input must be bytestrings (the file format is ASCII) | ||
for word in words: | ||
assert(isinstance(word, bytes)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same remark as above re: assertions. |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, the logic of this function may perhaps be more easily expressed using regexes. |
||
texname, psname = words[:2] | ||
effects, encoding, filename = '', None, None | ||
effects, encoding, filename = b'', None, None | ||
for word in words[2:]: | ||
if not word.startswith('<'): | ||
if not word.startswith(b'<'): | ||
effects = word | ||
else: | ||
word = word.lstrip('<') | ||
if word.startswith('[') or word.endswith('.enc'): | ||
word = word.lstrip(b'<') | ||
if word.startswith(b'[') or word.endswith(b'.enc'): | ||
if encoding is not None: | ||
matplotlib.verbose.report( | ||
'Multiple encodings for %s = %s' | ||
% (texname, psname), 'debug') | ||
if word.startswith('['): | ||
if word.startswith(b'['): | ||
encoding = word[1:] | ||
else: | ||
encoding = word | ||
|
@@ -884,11 +887,11 @@ def _register(self, words): | |
eff = effects.split() | ||
effects = {} | ||
try: | ||
effects['slant'] = float(eff[eff.index('SlantFont')-1]) | ||
effects['slant'] = float(eff[eff.index(b'SlantFont')-1]) | ||
except ValueError: | ||
pass | ||
try: | ||
effects['extend'] = float(eff[eff.index('ExtendFont')-1]) | ||
effects['extend'] = float(eff[eff.index(b'ExtendFont')-1]) | ||
except ValueError: | ||
pass | ||
|
||
|
@@ -927,26 +930,27 @@ def _parse(self, file): | |
|
||
state = 0 | ||
for line in file: | ||
comment_start = line.find('%') | ||
line = six.b(line) | ||
comment_start = line.find(b'%') | ||
if comment_start > -1: | ||
line = line[:comment_start] | ||
line = line.strip() | ||
|
||
if state == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried googling without much success the format of enc files but it looks like this is looking for patterns for the form
and returning the slash-separated words within the brackets. At least the searching for brackets-surrounded fragments can clearly be rewritten using re.findall as above. |
||
# Expecting something like /FooEncoding [ | ||
if '[' in line: | ||
if b'[' in line: | ||
state = 1 | ||
line = line[line.index('[')+1:].strip() | ||
line = line[line.index(b'[')+1:].strip() | ||
|
||
if state == 1: | ||
if ']' in line: # ] def | ||
line = line[:line.index(']')] | ||
if b']' in line: # ] def | ||
line = line[:line.index(b']')] | ||
state = 2 | ||
words = line.split() | ||
for w in words: | ||
if w.startswith('/'): | ||
if w.startswith(b'/'): | ||
# Allow for /abc/def/ghi | ||
subwords = w.split('/') | ||
subwords = w.split(b'/') | ||
result.extend(subwords[1:]) | ||
else: | ||
raise ValueError("Broken name in encoding file: " + w) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,47 +18,50 @@ def test_PsfontsMap(monkeypatch): | |
fontmap = dr.PsfontsMap(filename) | ||
# Check all properties of a few fonts | ||
for n in [1, 2, 3, 4, 5]: | ||
key = 'TeXfont%d' % n | ||
key = b'TeXfont%d' % n | ||
entry = fontmap[key] | ||
assert entry.texname == key | ||
assert entry.psname == 'PSfont%d' % n | ||
assert entry.psname == b'PSfont%d' % n | ||
if n not in [3, 5]: | ||
assert entry.encoding == 'font%d.enc' % n | ||
assert entry.encoding == b'font%d.enc' % n | ||
elif n == 3: | ||
assert entry.encoding == 'enc3.foo' | ||
assert entry.encoding == b'enc3.foo' | ||
# We don't care about the encoding of TeXfont5, which specifies | ||
# multiple encodings. | ||
if n not in [1, 5]: | ||
assert entry.filename == 'font%d.pfa' % n | ||
assert entry.filename == b'font%d.pfa' % n | ||
else: | ||
assert entry.filename == 'font%d.pfb' % n | ||
assert entry.filename == b'font%d.pfb' % n | ||
if n == 4: | ||
assert entry.effects == {'slant': -0.1, 'extend': 2.2} | ||
else: | ||
assert entry.effects == {} | ||
# Some special cases | ||
entry = fontmap['TeXfont6'] | ||
entry = fontmap[b'TeXfont6'] | ||
assert entry.filename is None | ||
assert entry.encoding is None | ||
entry = fontmap['TeXfont7'] | ||
entry = fontmap[b'TeXfont7'] | ||
assert entry.filename is None | ||
assert entry.encoding == 'font7.enc' | ||
entry = fontmap['TeXfont8'] | ||
assert entry.filename == 'font8.pfb' | ||
assert entry.encoding == b'font7.enc' | ||
entry = fontmap[b'TeXfont8'] | ||
assert entry.filename == b'font8.pfb' | ||
assert entry.encoding is None | ||
entry = fontmap['TeXfont9'] | ||
assert entry.filename == '/absolute/font9.pfb' | ||
entry = fontmap[b'TeXfont9'] | ||
assert entry.filename == b'/absolute/font9.pfb' | ||
|
||
|
||
@skip_if_command_unavailable(["kpsewhich", "-version"]) | ||
def test_dviread(): | ||
dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread') | ||
with open(os.path.join(dir, 'test.json')) as f: | ||
correct = json.load(f) | ||
for entry in correct: | ||
entry['text'] = [[a, b, c, six.b(d), e] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace |
||
for [a, b, c, d, e] in entry['text']] | ||
with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi: | ||
data = [{'text': [[t.x, t.y, | ||
six.unichr(t.glyph), | ||
six.text_type(t.font.texname), | ||
t.font.texname, | ||
round(t.font.size, 2)] | ||
for t in page.text], | ||
'boxes': [[b.x, b.y, b.height, b.width] for b in page.boxes]} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we can't use numpydoc/napoleon-style docstrings I would at least put the type at a more prominent place.