Skip to content

Commit 47254fc

Browse files
committed
get_sample_data improvements: remove old files from subdirectories and
not only the top-level directory; try to handle the disconnected use case; use the perhaps more stable svnroot URL instead of the viewvc one svn path=/trunk/matplotlib/; revision=7493
1 parent 6147c20 commit 47254fc

File tree

1 file changed

+50
-30
lines changed

1 file changed

+50
-30
lines changed

lib/matplotlib/cbook.py

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def is_scalar_or_string(val):
355355
class ViewVCCachedServer(urllib2.BaseHandler):
356356
"""
357357
Urllib2 handler that takes care of caching files.
358-
The file cache.pck holds the directory of files to be cached.
358+
The file cache.pck holds the directory of files that have been cached.
359359
"""
360360
def __init__(self, cache_dir, baseurl):
361361
self.cache_dir = cache_dir
@@ -386,9 +386,14 @@ def read_cache(self):
386386
cache = cPickle.load(f)
387387
f.close()
388388

389+
# Earlier versions did not have the full paths in cache.pck
390+
for url, (fn, x, y) in cache.items():
391+
if not os.path.isabs(fn):
392+
cache[url] = (self.in_cache_dir(fn), x, y)
393+
389394
# If any files are deleted, drop them from the cache
390395
for url, (fn, _, _) in cache.items():
391-
if not os.path.exists(self.in_cache_dir(fn)):
396+
if not os.path.exists(fn):
392397
del cache[url]
393398

394399
self.cache = cache
@@ -398,14 +403,20 @@ def remove_stale_files(self):
398403
Remove files from the cache directory that are not listed in
399404
cache.pck.
400405
"""
401-
listed = set([fn for (_, (fn, _, _)) in self.cache.items()])
402-
for path in os.listdir(self.cache_dir):
403-
if path not in listed and path != 'cache.pck':
404-
thisfile = os.path.join(self.cache_dir, path)
405-
if not os.path.isdir(thisfile):
406-
matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: removing %s'%thisfile,
407-
level='debug')
408-
os.remove(thisfile)
406+
# TODO: remove empty subdirectories
407+
listed = set(fn for (_, (fn, _, _)) in self.cache.items())
408+
existing = reduce(set.union,
409+
(set(os.path.join(dirpath, fn) for fn in filenames)
410+
for (dirpath, _, filenames) in os.walk(self.cache_dir)))
411+
matplotlib.verbose.report(
412+
'ViewVCCachedServer: files listed in cache.pck: %s' % listed, 'debug')
413+
matplotlib.verbose.report(
414+
'ViewVCCachedServer: files in cache directory: %s' % existing, 'debug')
415+
416+
for path in existing - listed - set([self.in_cache_dir('cache.pck')]):
417+
matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: removing %s'%path,
418+
level='debug')
419+
os.remove(path)
409420

410421
def write_cache(self):
411422
"""
@@ -424,17 +435,12 @@ def cache_file(self, url, data, headers):
424435
fn = url[len(self.baseurl):]
425436
fullpath = self.in_cache_dir(fn)
426437

427-
#while os.path.exists(self.in_cache_dir(fn)):
428-
# fn = rightmost + '.' + str(random.randint(0,9999999))
429-
430-
431-
432-
f = open(self.in_cache_dir(fn), 'wb')
438+
f = open(fullpath, 'wb')
433439
f.write(data)
434440
f.close()
435441

436442
# Update the cache
437-
self.cache[url] = (fn, headers.get('ETag'), headers.get('Last-Modified'))
443+
self.cache[url] = (fullpath, headers.get('ETag'), headers.get('Last-Modified'))
438444
self.write_cache()
439445

440446
# These urllib2 entry points are used:
@@ -459,9 +465,9 @@ def http_error_304(self, req, fp, code, msg, hdrs):
459465
"""
460466
url = req.get_full_url()
461467
fn, _, _ = self.cache[url]
462-
cachefile = self.in_cache_dir(fn)
463-
matplotlib.verbose.report('ViewVCCachedServer: reading data file from cache file "%s"'%cachefile)
464-
file = open(cachefile, 'rb')
468+
matplotlib.verbose.report('ViewVCCachedServer: reading data file from cache file "%s"'
469+
%fn, 'debug')
470+
file = open(fn, 'rb')
465471
handle = urllib2.addinfourl(file, hdrs, url)
466472
handle.code = 304
467473
return handle
@@ -470,6 +476,8 @@ def http_response(self, req, response):
470476
"""
471477
Update the cache with the returned file.
472478
"""
479+
matplotlib.verbose.report('ViewVCCachedServer: received response %d: %s'
480+
% (response.code, response.msg), 'debug')
473481
if response.code != 200:
474482
return response
475483
else:
@@ -489,10 +497,10 @@ def get_sample_data(self, fname, asfileobj=True):
489497
store it in the cachedir.
490498
491499
If asfileobj is True, a file object will be returned. Else the
492-
path to the file as a string will be returned
493-
500+
path to the file as a string will be returned.
494501
"""
495-
502+
# TODO: time out if the connection takes forever
503+
# (may not be possible with urllib2 only - spawn a helper process?)
496504

497505
# quote is not in python2.4, so check for it and get it from
498506
# urllib if it is not available
@@ -501,12 +509,24 @@ def get_sample_data(self, fname, asfileobj=True):
501509
import urllib
502510
quote = urllib.quote
503511

512+
# retrieve the URL for the side effect of refreshing the cache
504513
url = self.baseurl + quote(fname)
505-
response = self.opener.open(url)
506-
507-
508-
relpath = self.cache[url][0]
509-
fname = self.in_cache_dir(relpath)
514+
error = 'unknown error'
515+
matplotlib.verbose.report('ViewVCCachedServer: retrieving %s'
516+
% url, 'debug')
517+
try:
518+
response = self.opener.open(url)
519+
except urllib2.URLError, e:
520+
# could be a missing network connection
521+
error = str(e)
522+
523+
cached = self.cache.get(url)
524+
if cached is None:
525+
msg = 'file %s not in cache; received %s when trying to retrieve' \
526+
% (fname, error)
527+
raise KeyError(msg)
528+
529+
fname = cached[0]
510530

511531
if asfileobj:
512532
return file(fname)
@@ -519,7 +539,7 @@ def get_sample_data(fname, asfileobj=True):
519539
Check the cachedirectory ~/.matplotlib/sample_data for a sample_data
520540
file. If it does not exist, fetch it with urllib from the mpl svn repo
521541
522-
http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/
542+
http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/
523543
524544
and store it in the cachedir.
525545
@@ -539,7 +559,7 @@ def get_sample_data(fname, asfileobj=True):
539559
if myserver is None:
540560
configdir = matplotlib.get_configdir()
541561
cachedir = os.path.join(configdir, 'sample_data')
542-
baseurl = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/'
562+
baseurl = 'http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/'
543563
myserver = get_sample_data.myserver = ViewVCCachedServer(cachedir, baseurl)
544564

545565
return myserver.get_sample_data(fname, asfileobj=asfileobj)

0 commit comments

Comments
 (0)