-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathcache.py
197 lines (173 loc) · 6.64 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from semmle.util import makedirs
import os
from collections import deque
from functools import total_ordering
'''
Least Recently Written Disk-based Cache
Implements a LRW disk cache for trap files and similar.
This cache relies on the following properties which *must* hold.
Only one value can ever be associated with a key.
Keys should be ascii strings and cannot start with '$' or include any file or path separator characters.
Values should be byte strings (with any contents).
The cache is robust against arbitrary levels of concurrency.
'''
MAX_GENERATIONS = 50
MAX_FILES_PER_GENERATION = 200
def encode_keys(keys):
'Convert a collection of keys to a byte string'
return '\n'.join(keys).encode("ascii")
def decode_keys(data):
'Convert a byte string into a set of keys'
return set(data.decode("ascii").split('\n'))
@total_ordering
class Generation(object):
def __init__(self, cachedir, age):
self.cachedir = os.path.join(cachedir, str(age))
self.age = age
if not os.path.exists(self.cachedir):
makedirs(self.cachedir)
try:
with open(os.path.join(self.cachedir, "$keys"), 'rb') as fd:
self.keys = decode_keys(fd.read())
self.full = True
except Exception:
self.keys = set()
if os.path.isdir(self.cachedir):
#Directory exists, but cannot read "$keys", so this is a non-full generation
self.full = False
else:
self.full = True
def get(self, key):
if self.full and key not in self.keys:
return None
try:
with open(os.path.join(self.cachedir, key), 'rb') as fd:
return fd.read()
except Exception:
return None
def set(self, key, value):
'''Returns true if it should be able to store (key, value) even if in fact it can't.
This means that this method will return True if the generation is not full.'''
if self.full:
return False
if os.path.exists(os.path.join(self.cachedir, "$keys")):
self.full = True
try:
with open(os.path.join(self.cachedir, "$keys"), 'rb') as fd:
self.keys = decode_keys(fd.read())
except Exception:
self.keys = set()
return False
self._try_atomic_write_file(key, value)
if len(self._list_files()) >= MAX_FILES_PER_GENERATION:
self.full = True
self._write_keys()
return True
def _list_files(self):
try:
return os.listdir(self.cachedir)
except Exception:
#This probably means the directory has been deleted
return []
def _write_keys(self):
keys = self._list_files()
self._try_atomic_write_file("$keys", encode_keys(keys))
self.keys = set(keys)
def _try_atomic_write_file(self, name, contents):
fullname = os.path.join(self.cachedir, name)
tmpname = os.path.join(self.cachedir, '$%d%s' % (os.getpid(), name))
try:
with open(tmpname, 'wb') as tmp:
tmp.write(contents)
os.rename(tmpname, fullname)
except Exception:
#Failed for some reason. The folder may have been deleted, or on Windows, the file may already exist.
#Attempt to tidy up
if os.path.exists(tmpname):
try:
os.remove(tmpname)
except Exception:
#Give up :(
pass
def clear(self):
try:
filenames = os.listdir(self.cachedir)
except Exception:
#Can't do anything
return
for filename in filenames:
try:
os.remove(os.path.join(self.cachedir, filename))
except Exception:
# Can't delete. Maybe another process has deleted it or it is open (on Windows)
pass
try:
os.rmdir(self.cachedir)
except Exception:
# Can't delete
pass
def __lt__(self, other):
#Smaller numbers are older
return self.age > other.age
class Cache(object):
cache_of_caches = {}
def __init__(self, cachedir, verbose=False):
self.cachedir = cachedir
self.verbose = verbose
self.generations = []
if not os.path.exists(cachedir):
makedirs(cachedir)
generations = []
for gen in os.listdir(self.cachedir):
try:
age = int(gen)
generations.append(Generation(self.cachedir, age))
except Exception:
#gen might not be an int, or it may have been deleted
pass
if generations:
generations.sort()
else:
generations = [Generation(self.cachedir, 1)]
self.generations = deque(generations)
while len(self.generations) > MAX_GENERATIONS:
self.generations.pop().clear()
def set(self, key, value):
'''Add this (key, value) pair to the cache. keys should not start with '$' or include file or path separators.
Either adds the (key, value) atomically or does nothing. Partial keys or values are never visible.
'''
try:
while not self.generations[0].set(key, value):
self.generations.appendleft(Generation(self.cachedir, self.generations[0].age+1))
if len(self.generations) > MAX_GENERATIONS:
self.generations.pop().clear()
except Exception as ex:
#Its OK to fail but we must never raise
if self.verbose:
try:
print ("Exception setting cache key '%s': %s" % (key, ex))
except Exception:
# Just in case
pass
def get(self, key):
if key is None:
return None
try:
for gen in self.generations:
res = gen.get(key)
if res is not None:
return res
except Exception as ex:
if self.verbose:
try:
print ("Exception getting cache key '%s': %s" % (key, ex))
except Exception:
# Just in case
pass
return None
@staticmethod
def for_directory(cachedir, verbose):
'''Caches are relatively expensive objects, so we cache them.'''
if (cachedir, verbose) not in Cache.cache_of_caches:
Cache.cache_of_caches[(cachedir, verbose)] = Cache(cachedir, verbose)
return Cache.cache_of_caches[(cachedir, verbose)]