Skip to content

Commit 0f6a627

Browse files
authored
Merge pull request #23295 from anntzer/re.sub-str.translate
Replace re.sub by the faster str.translate.
2 parents 7864e76 + ef16bfb commit 0f6a627

File tree

1 file changed

+17
-29
lines changed

1 file changed

+17
-29
lines changed

lib/matplotlib/backends/backend_pdf.py

+17-29
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import logging
1414
import math
1515
import os
16-
import re
1716
import string
1817
import struct
1918
import sys
@@ -119,25 +118,6 @@ def _fill(strings, linelen=75):
119118
result.append(b' '.join(strings[lasti:]))
120119
return b'\n'.join(result)
121120

122-
# PDF strings are supposed to be able to include any eight-bit data,
123-
# except that unbalanced parens and backslashes must be escaped by a
124-
# backslash. However, sf bug #2708559 shows that the carriage return
125-
# character may get read as a newline; these characters correspond to
126-
# \gamma and \Omega in TeX's math font encoding. Escaping them fixes
127-
# the bug.
128-
_string_escape_regex = re.compile(br'([\\()\r\n])')
129-
130-
131-
def _string_escape(match):
132-
m = match.group(0)
133-
if m in br'\()':
134-
return b'\\' + m
135-
elif m == b'\n':
136-
return br'\n'
137-
elif m == b'\r':
138-
return br'\r'
139-
assert False
140-
141121

142122
def _create_pdf_info_dict(backend, metadata):
143123
"""
@@ -267,6 +247,15 @@ def _get_link_annotation(gc, x, y, width, height):
267247
return link_annotation
268248

269249

250+
# PDF strings are supposed to be able to include any eight-bit data, except
251+
# that unbalanced parens and backslashes must be escaped by a backslash.
252+
# However, sf bug #2708559 shows that the carriage return character may get
253+
# read as a newline; these characters correspond to \gamma and \Omega in TeX's
254+
# math font encoding. Escaping them fixes the bug.
255+
_str_escapes = str.maketrans({
256+
'\\': '\\\\', '(': '\\(', ')': '\\)', '\n': '\\n', '\r': '\\r'})
257+
258+
270259
def pdfRepr(obj):
271260
"""Map Python objects to PDF syntax."""
272261

@@ -292,22 +281,21 @@ def pdfRepr(obj):
292281
elif isinstance(obj, (int, np.integer)):
293282
return b"%d" % obj
294283

295-
# Unicode strings are encoded in UTF-16BE with byte-order mark.
284+
# Non-ASCII Unicode strings are encoded in UTF-16BE with byte-order mark.
296285
elif isinstance(obj, str):
297-
try:
298-
# But maybe it's really ASCII?
299-
s = obj.encode('ASCII')
300-
return pdfRepr(s)
301-
except UnicodeEncodeError:
302-
s = codecs.BOM_UTF16_BE + obj.encode('UTF-16BE')
303-
return pdfRepr(s)
286+
return pdfRepr(obj.encode('ascii') if obj.isascii()
287+
else codecs.BOM_UTF16_BE + obj.encode('UTF-16BE'))
304288

305289
# Strings are written in parentheses, with backslashes and parens
306290
# escaped. Actually balanced parens are allowed, but it is
307291
# simpler to escape them all. TODO: cut long strings into lines;
308292
# I believe there is some maximum line length in PDF.
293+
# Despite the extra decode/encode, translate is faster than regex.
309294
elif isinstance(obj, bytes):
310-
return b'(' + _string_escape_regex.sub(_string_escape, obj) + b')'
295+
return (
296+
b'(' +
297+
obj.decode('latin-1').translate(_str_escapes).encode('latin-1')
298+
+ b')')
311299

312300
# Dictionaries. The keys must be PDF names, so if we find strings
313301
# there, we make Name objects from them. The values may be

0 commit comments

Comments
 (0)