-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Handle dvi font names as ASCII bytestrings #6977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
705b021
dbc8b9e
93fad55
4874e4e
a130ba7
0f0e41a
a7b5772
803a96e
ec5d80e
fe52808
aa8c4f6
9de07aa
c87b653
2e19a61
119934a
8fa303f
94587b1
254e3df
a8674b3
25a8fed
92e2c52
5ba21b0
10135bf
6de9813
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Combine the word splitting and classification in one regex so we only have to scan each line once. Add some quotation marks in the test case to check that we are handling quoted words correctly (the behavior should always have matched this test case).
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -868,16 +868,8 @@ def __getitem__(self, texname): | |
return result._replace(filename=fn, encoding=enc) | ||
|
||
def _parse(self, file): | ||
for line in file: | ||
line = line.strip() | ||
if line == b'' or line.startswith(b'%'): | ||
continue | ||
words = [word.strip(b'"') for word in | ||
re.findall(b'("[^"]*"|[^ ]+)', line)] | ||
self._register(words) | ||
|
||
def _register(self, words): | ||
"""Register a font described by "words", a sequence of bytestrings. | ||
""" | ||
Parse the font mapping file. | ||
|
||
The format is, AFAIK: texname fontname [effects and filenames] | ||
Effects are PostScript snippets like ".177 SlantFont", | ||
|
@@ -889,52 +881,68 @@ def _register(self, words): | |
There is some difference between <foo.pfb and <<bar.pfb in | ||
subsetting, but I have no example of << in my TeX installation. | ||
""" | ||
|
||
# If the map file specifies multiple encodings for a font, we | ||
# follow pdfTeX in choosing the last one specified. Such | ||
# entries are probably mistakes but they have occurred. | ||
# http://tex.stackexchange.com/questions/10826/ | ||
# http://article.gmane.org/gmane.comp.tex.pdftex/4914 | ||
|
||
texname, psname = words[:2] | ||
words = words[2:] | ||
effects, encoding, filename = b'', None, None | ||
empty_re = re.compile(br'%|\s*$') | ||
word_re = re.compile( | ||
br'''(?x) (?: | ||
"<\[ (?P<enc1> [^"]+ )" | # quoted encoding marked by [ | ||
"< (?P<enc2> [^"]+.enc)" | # quoted encoding, ends in .enc | ||
"<<? (?P<file1> [^"]+ )" | # quoted font file name | ||
" (?P<eff1> [^"]+ )" | # quoted effects or font name | ||
<\[ (?P<enc3> \S+ ) | # encoding marked by [ | ||
< (?P<enc4> \S+ .enc) | # encoding, ends in .enc | ||
<<? (?P<file2> \S+ ) | # font file name | ||
(?P<eff2> \S+ ) # effects or font name | ||
)''') | ||
effects_re = re.compile( | ||
br'''(?x) (?P<slant> -?[0-9]*(?:\.[0-9]+)) \s* SlantFont | ||
| (?P<extend>-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''') | ||
|
||
lines = (line.strip() | ||
for line in file | ||
if not empty_re.match(line)) | ||
for line in lines: | ||
effects, encoding, filename = b'', None, None | ||
words = word_re.finditer(line) | ||
|
||
w = next(words) | ||
texname = w.group('eff2') or w.group('eff1') | ||
w = next(words) | ||
psname = w.group('eff2') or w.group('eff1') | ||
|
||
for w in words: | ||
eff = w.group('eff1') or w.group('eff2') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Everywhere else these are listed in reverse order to how they are in the expression, does that matter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The named groups are mutually exclusive so the order doesn't matter for correctness, but there may be a very slight performance difference. I think I was listing the groups in an approximate order of probability of occurrence, e.g. effects are almost certainly quoted because they include arguments, but file and font names are almost certainly not quoted. I can see how this would be confusing; I'll add a comment. |
||
if eff: | ||
effects = eff | ||
continue | ||
enc = (w.group('enc4') or w.group('enc3') or | ||
w.group('enc2') or w.group('enc1')) | ||
if enc: | ||
if encoding is not None: | ||
matplotlib.verbose.report( | ||
'Multiple encodings for %s = %s' | ||
% (texname, psname), | ||
'debug') | ||
encoding = enc | ||
continue | ||
filename = w.group('file2') or w.group('file1') | ||
|
||
# pick the last non-filename word for effects | ||
effects_words = [word for word in words if not word.startswith(b'<')] | ||
if effects_words: | ||
effects = effects_words[-1] | ||
effects_dict = {} | ||
for match in effects_re.finditer(effects): | ||
slant = match.group('slant') | ||
if slant: | ||
effects_dict['slant'] = float(slant) | ||
else: | ||
effects_dict['extend'] = float(match.group('extend')) | ||
|
||
encoding_re = br'<<?(\[.*|.*\.enc)' | ||
encoding_files = [word.lstrip(b'<').lstrip(b'[') | ||
for word in words | ||
if re.match(encoding_re, word)] | ||
if len(encoding_files) > 1: | ||
matplotlib.verbose.report( | ||
'Multiple encodings for %s = %s' % (texname, psname), 'debug') | ||
if encoding_files: | ||
encoding = encoding_files[-1] | ||
|
||
font_files = [word.lstrip(b'<') | ||
for word in words | ||
if word.startswith(b'<') | ||
and not re.match(encoding_re, word)] | ||
if font_files: | ||
filename = font_files[-1] | ||
|
||
eff = {} | ||
for psword, keyword in ((b'SlantFont', 'slant'), | ||
(b'ExtendFont', 'extend')): | ||
match = re.search(b'([^ ]+) +' + psword, effects) | ||
if match: | ||
try: | ||
eff[keyword] = float(match.group(1)) | ||
except ValueError: | ||
pass | ||
|
||
self._font[texname] = PsFont( | ||
texname=texname, psname=psname, effects=eff, | ||
encoding=encoding, filename=filename) | ||
self._font[texname] = PsFont( | ||
texname=texname, psname=psname, effects=effects_dict, | ||
encoding=encoding, filename=filename) | ||
|
||
|
||
class Encoding(object): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I learned quite a bit about regular expressions understanding this pattern.
For example, you can make them readable and name groups!