|
24 | 24 | import os
|
25 | 25 | import re
|
26 | 26 | import struct
|
| 27 | +import sqlite3 |
27 | 28 | import sys
|
28 | 29 | import textwrap
|
29 | 30 |
|
30 | 31 | import numpy as np
|
31 | 32 |
|
32 |
| -from matplotlib import cbook, rcParams |
| 33 | +from matplotlib import cbook, get_cachedir, rcParams |
33 | 34 | from matplotlib.compat import subprocess
|
34 | 35 |
|
35 | 36 | _log = logging.getLogger(__name__)
|
@@ -980,45 +981,259 @@ def _parse(self, file):
|
980 | 981 | return re.findall(br'/([^][{}<>\s]+)', data)
|
981 | 982 |
|
982 | 983 |
|
983 |
| -def find_tex_file(filename, format=None): |
| 984 | +class TeXSupportCacheError(Exception): |
| 985 | + pass |
| 986 | + |
| 987 | + |
| 988 | +class TeXSupportCache: |
| 989 | + """A persistent cache of data related to support files related to dvi |
| 990 | + files produced by TeX. Currently holds results from :program:`kpsewhich`, |
| 991 | + in future versions could hold pre-parsed font data etc. |
| 992 | +
|
| 993 | + Usage:: |
| 994 | +
|
| 995 | + # create or get the singleton instance |
| 996 | + cache = TeXSupportCache.get_cache() |
| 997 | + with cache.connection as transaction: |
| 998 | + cache.update_pathnames( |
| 999 | + {"pdftex.map": "/usr/local/pdftex.map", |
| 1000 | + "cmsy10.pfb": "/usr/local/fonts/cmsy10.pfb"}, |
| 1001 | + transaction) |
| 1002 | + pathnames = cache.get_pathnames(["pdftex.map", "cmr10.pfb"]) |
| 1003 | + # now pathnames = {"pdftex.map": "/usr/local/pdftex.map"} |
| 1004 | +
|
| 1005 | + # optional after inserting new data, may improve query performance: |
| 1006 | + cache.optimize() |
| 1007 | +
|
| 1008 | + Parameters |
| 1009 | + ---------- |
| 1010 | +
|
| 1011 | + filename : str, optional |
| 1012 | + File in which to store the cache. Defaults to `texsupport.N.db` in |
| 1013 | + the standard cache directory where N is the current schema version. |
| 1014 | +
|
| 1015 | + Attributes |
| 1016 | + ---------- |
| 1017 | +
|
| 1018 | + connection |
| 1019 | + This database connection object has a context manager to set up |
| 1020 | + a transaction. Transactions are passed into methods that write to |
| 1021 | + the database. |
984 | 1022 | """
|
985 |
| - Find a file in the texmf tree. |
| 1023 | + |
| 1024 | + __slots__ = ('connection') |
| 1025 | + schema_version = 1 # should match PRAGMA user_version in _create |
| 1026 | + instance = None |
| 1027 | + |
| 1028 | + @classmethod |
| 1029 | + def get_cache(cls): |
| 1030 | + "Return the singleton instance of the cache, at the default location" |
| 1031 | + if cls.instance is None: |
| 1032 | + cls.instance = cls() |
| 1033 | + return cls.instance |
| 1034 | + |
| 1035 | + def __init__(self, filename=None): |
| 1036 | + if filename is None: |
| 1037 | + filename = os.path.join(get_cachedir(), 'texsupport.%d.db' |
| 1038 | + % self.schema_version) |
| 1039 | + |
| 1040 | + self.connection = sqlite3.connect( |
| 1041 | + filename, isolation_level="DEFERRED") |
| 1042 | + with self.connection as conn: |
| 1043 | + conn.execute("PRAGMA journal_mode=WAL;") |
| 1044 | + version, = conn.execute("PRAGMA user_version;").fetchone() |
| 1045 | + |
| 1046 | + if version == 0: |
| 1047 | + self._create() |
| 1048 | + elif version != self.schema_version: |
| 1049 | + raise TeXSupportCacheError( |
| 1050 | + "support database %s has version %d, expected %d" |
| 1051 | + % (filename, version, self.schema_version)) |
| 1052 | + |
| 1053 | + def _create(self): |
| 1054 | + """Create the database.""" |
| 1055 | + with self.connection as conn: |
| 1056 | + conn.executescript( |
| 1057 | + """ |
| 1058 | + PRAGMA page_size=4096; |
| 1059 | + CREATE TABLE file_path( |
| 1060 | + filename TEXT PRIMARY KEY NOT NULL, |
| 1061 | + pathname TEXT |
| 1062 | + ) WITHOUT ROWID; |
| 1063 | + PRAGMA user_version=1; |
| 1064 | + """) |
| 1065 | + |
| 1066 | + def optimize(self): |
| 1067 | + """Optional optimization phase after updating data. |
| 1068 | + Executes sqlite's `PRAGMA optimize` statement, which can call |
| 1069 | + `ANALYZE` or other functions that can improve future query performance |
| 1070 | + by spending some time up-front.""" |
| 1071 | + with self.connection as conn: |
| 1072 | + conn.execute("PRAGMA optimize;") |
| 1073 | + |
| 1074 | + def get_pathnames(self, filenames): |
| 1075 | + """Query the cache for pathnames related to `filenames`. |
| 1076 | +
|
| 1077 | + Parameters |
| 1078 | + ---------- |
| 1079 | + filenames : iterable of str |
| 1080 | +
|
| 1081 | + Returns |
| 1082 | + ------- |
| 1083 | + mapping from str to (str or None) |
| 1084 | + For those filenames that exist in the cache, the mapping |
| 1085 | + includes either the related pathname or None to indicate that |
| 1086 | + the named file does not exist. |
| 1087 | + """ |
| 1088 | + rows = self.connection.execute( |
| 1089 | + "SELECT filename, pathname FROM file_path WHERE filename IN " |
| 1090 | + "(%s)" |
| 1091 | + % ','.join('?' for _ in filenames), |
| 1092 | + filenames).fetchall() |
| 1093 | + return {filename: pathname for (filename, pathname) in rows} |
| 1094 | + |
| 1095 | + def update_pathnames(self, mapping, transaction): |
| 1096 | + """Update the cache with the given filename-to-pathname mapping |
| 1097 | +
|
| 1098 | + Parameters |
| 1099 | + ---------- |
| 1100 | + mapping : mapping from str to (str or None) |
| 1101 | + Mapping from filenames to the corresponding full pathnames |
| 1102 | + or None to indicate that the named file does not exist. |
| 1103 | + transaction : obtained via the context manager of self.connection |
| 1104 | + """ |
| 1105 | + transaction.executemany( |
| 1106 | + "INSERT OR REPLACE INTO file_path (filename, pathname) " |
| 1107 | + "VALUES (?, ?)", |
| 1108 | + mapping.items()) |
| 1109 | + |
| 1110 | + |
| 1111 | +def find_tex_files(filenames, cache=None): |
| 1112 | + """Find multiple files in the texmf tree. This can be more efficient |
| 1113 | + than `find_tex_file` because it makes only one call to `kpsewhich`. |
986 | 1114 |
|
987 | 1115 | Calls :program:`kpsewhich` which is an interface to the kpathsea
|
988 | 1116 | library [1]_. Most existing TeX distributions on Unix-like systems use
|
989 | 1117 | kpathsea. It is also available as part of MikTeX, a popular
|
990 | 1118 | distribution on Windows.
|
991 | 1119 |
|
| 1120 | + The results are cached into the TeX support database. In case of |
| 1121 | + mistaken results, deleting the database resets the cache. |
| 1122 | +
|
992 | 1123 | Parameters
|
993 | 1124 | ----------
|
994 | 1125 | filename : string or bytestring
|
995 |
| - format : string or bytestring |
996 |
| - Used as the value of the `--format` option to :program:`kpsewhich`. |
997 |
| - Could be e.g. 'tfm' or 'vf' to limit the search to that type of files. |
| 1126 | + cache : TeXSupportCache, optional |
| 1127 | + Cache instance to use, defaults to the singleton instance of the class. |
998 | 1128 |
|
999 | 1129 | References
|
1000 | 1130 | ----------
|
1001 | 1131 |
|
1002 | 1132 | .. [1] `Kpathsea documentation <http://www.tug.org/kpathsea/>`_
|
1003 | 1133 | The library that :program:`kpsewhich` is part of.
|
| 1134 | +
|
1004 | 1135 | """
|
1005 | 1136 |
|
1006 | 1137 | # we expect these to always be ascii encoded, but use utf-8
|
1007 | 1138 | # out of caution
|
1008 |
| - if isinstance(filename, bytes): |
1009 |
| - filename = filename.decode('utf-8', errors='replace') |
1010 |
| - if isinstance(format, bytes): |
1011 |
| - format = format.decode('utf-8', errors='replace') |
| 1139 | + filenames = [f.decode('utf-8', errors='replace') |
| 1140 | + if isinstance(f, bytes) else f |
| 1141 | + for f in filenames] |
| 1142 | + if cache is None: |
| 1143 | + cache = TeXSupportCache.get_cache() |
| 1144 | + result = cache.get_pathnames(filenames) |
| 1145 | + |
| 1146 | + filenames = [f for f in filenames if f not in result] |
| 1147 | + if not filenames: |
| 1148 | + return result |
1012 | 1149 |
|
1013 |
| - cmd = ['kpsewhich'] |
1014 |
| - if format is not None: |
1015 |
| - cmd += ['--format=' + format] |
1016 |
| - cmd += [filename] |
1017 |
| - _log.debug('find_tex_file(%s): %s', filename, cmd) |
| 1150 | + cmd = ['kpsewhich'] + list(filenames) |
| 1151 | + _log.debug('find_tex_files: %s', cmd) |
1018 | 1152 | pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
1019 |
| - result = pipe.communicate()[0].rstrip() |
1020 |
| - _log.debug('find_tex_file result: %s', result) |
1021 |
| - return result.decode('ascii') |
| 1153 | + output = pipe.communicate()[0].decode('ascii').splitlines() |
| 1154 | + _log.debug('find_tex_files result: %s', output) |
| 1155 | + mapping = _match(filenames, output) |
| 1156 | + with cache.connection as transaction: |
| 1157 | + cache.update_pathnames(mapping, transaction) |
| 1158 | + result.update(mapping) |
| 1159 | + |
| 1160 | + return result |
| 1161 | + |
| 1162 | + |
| 1163 | +def _match(filenames, pathnames): |
| 1164 | + """ |
| 1165 | + Match filenames to pathnames in lists that are in matching order, |
| 1166 | + except that some filenames may lack pathnames. |
| 1167 | + """ |
| 1168 | + result = {f: None for f in filenames} |
| 1169 | + filenames, pathnames = iter(filenames), iter(pathnames) |
| 1170 | + try: |
| 1171 | + filename, pathname = next(filenames), next(pathnames) |
| 1172 | + while True: |
| 1173 | + if pathname.endswith(os.path.sep + filename): |
| 1174 | + result[filename] = pathname |
| 1175 | + pathname = next(pathnames) |
| 1176 | + filename = next(filenames) |
| 1177 | + except StopIteration: |
| 1178 | + return result |
| 1179 | + |
| 1180 | + |
| 1181 | +def find_tex_file(filename, format=None, cache=None): |
| 1182 | + """ |
| 1183 | + Find a file in the texmf tree. |
| 1184 | +
|
| 1185 | + Calls :program:`kpsewhich` which is an interface to the kpathsea |
| 1186 | + library [1]_. Most existing TeX distributions on Unix-like systems use |
| 1187 | + kpathsea. It is also available as part of MikTeX, a popular |
| 1188 | + distribution on Windows. |
| 1189 | +
|
| 1190 | + The results are cached into a database whose location defaults to |
| 1191 | + :file:`~/.matplotlib/texsupport.db`. In case of mistaken results, |
| 1192 | + deleting this file resets the cache. |
| 1193 | +
|
| 1194 | + Parameters |
| 1195 | + ---------- |
| 1196 | + filename : string or bytestring |
| 1197 | + format : string or bytestring, DEPRECATED |
| 1198 | + Used as the value of the `--format` option to :program:`kpsewhich`. |
| 1199 | + Could be e.g. 'tfm' or 'vf' to limit the search to that type of files. |
| 1200 | + Deprecated to allow batching multiple filenames into one kpsewhich |
| 1201 | + call, since any format option would apply to all filenames at once. |
| 1202 | + cache : TeXSupportCache, optional |
| 1203 | + Cache instance to use, defaults to the singleton instance of the class. |
| 1204 | +
|
| 1205 | + References |
| 1206 | + ---------- |
| 1207 | +
|
| 1208 | + .. [1] `Kpathsea documentation <http://www.tug.org/kpathsea/>`_ |
| 1209 | + The library that :program:`kpsewhich` is part of. |
| 1210 | + """ |
| 1211 | + |
| 1212 | + if format is not None: |
| 1213 | + cbook.warn_deprecated( |
| 1214 | + "3.0", |
| 1215 | + "The format option to find_tex_file is deprecated " |
| 1216 | + "to allow batching multiple filenames into one call. " |
| 1217 | + "Omitting the option should not change the result, as " |
| 1218 | + "kpsewhich uses the filename extension to choose the path.") |
| 1219 | + # we expect these to always be ascii encoded, but use utf-8 |
| 1220 | + # out of caution |
| 1221 | + if isinstance(filename, bytes): |
| 1222 | + filename = filename.decode('utf-8', errors='replace') |
| 1223 | + if isinstance(format, bytes): |
| 1224 | + format = format.decode('utf-8', errors='replace') |
| 1225 | + |
| 1226 | + cmd = ['kpsewhich'] |
| 1227 | + if format is not None: |
| 1228 | + cmd += ['--format=' + format] |
| 1229 | + cmd += [filename] |
| 1230 | + _log.debug('find_tex_file(%s): %s', filename, cmd) |
| 1231 | + pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 1232 | + result = pipe.communicate()[0].rstrip() |
| 1233 | + _log.debug('find_tex_file result: %s', result) |
| 1234 | + return result.decode('ascii') |
| 1235 | + |
| 1236 | + return list(find_tex_files([filename], cache).values())[0] |
1022 | 1237 |
|
1023 | 1238 |
|
1024 | 1239 | # With multiple text objects per figure (e.g., tick labels) we may end
|
|
0 commit comments