Skip to content

Commit 2f04d73

Browse files
committed
new download function and dl function module move
1 parent eb20733 commit 2f04d73

File tree

4 files changed

+236
-76
lines changed

4 files changed

+236
-76
lines changed

devtests.ipynb

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,45 @@
244244
},
245245
{
246246
"cell_type": "code",
247-
"execution_count": null,
247+
"execution_count": 1,
248248
"metadata": {
249249
"collapsed": false
250250
},
251-
"outputs": [],
251+
"outputs": [
252+
{
253+
"name": "stdout",
254+
"output_type": "stream",
255+
"text": [
256+
"Downloading files...\n",
257+
"Finished downloading files\n"
258+
]
259+
}
260+
],
261+
"source": [
262+
"import wfdb\n",
263+
"wfdb.dldatabase(pbdb = 'tmpdb', dlbasedir = '/home/chen/Downloads/dbfolder', annotators = 'all', keepsubdirs = True)\n",
264+
"#dldatabase(pbdb, dlbasedir, records = 'All', annotators = None , onlyanns = False, keepsubdirs = True, overwrite = False): "
265+
]
266+
},
267+
{
268+
"cell_type": "code",
269+
"execution_count": 2,
270+
"metadata": {
271+
"collapsed": false
272+
},
273+
"outputs": [
274+
{
275+
"name": "stdout",
276+
"output_type": "stream",
277+
"text": [
278+
"Downloading files...\n",
279+
"Finished downloading files\n"
280+
]
281+
}
282+
],
252283
"source": [
253284
"import wfdb\n",
254-
"wfdb.dldatabase(pbdb = 'fecgsyndb', dlbasedir = '/home/chen/Downloads/dbfolder')"
285+
"wfdb.dldatabasefiles(pbdb = 'tmpdb', dlbasedir = '/home/chen/Downloads/dbfolder', files = ['test01_00s.hea', 'test01_00s.dat', 'a/A/test02_45s.hea', 'a/A/test01_00s.hea'], keepsubdirs = True, overwrite = False)"
255286
]
256287
},
257288
{

wfdb/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .records import Record, MultiRecord, rdheader, rdsamp, srdsamp, wrsamp
1+
from .records import Record, MultiRecord, rdheader, rdsamp, srdsamp, wrsamp, dldatabase, dldatabasefiles
22
from .annotations import Annotation, rdann, wrann, showanncodes
33
from .plots import plotrec, plotann
4-
from .downloads import dldatabase
4+
from .downloads import getdblist

wfdb/downloads.py

Lines changed: 88 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import os
44
import sys
55
import requests
6-
import multiprocessing
7-
from . import records
6+
from IPython.display import display
87

98
# Read a header file from physiobank
109
def streamheader(recordname, pbdir):
@@ -88,87 +87,109 @@ def streamannotation(filename, pbdir):
8887
return annbytes
8988

9089

91-
# Download all the WFDB files from a physiobank database
92-
def dldatabase(pbdb, dlbasedir, keepsubdirs = True, overwrite = False):
90+
# Return a list of all the physiobank databases available
91+
def getdblist():
92+
"""
93+
Usage:
94+
dblist = getdblist()
95+
"""
96+
url = os.path.join(dbindexurl, 'DBS')
97+
r = requests.get(url)
98+
99+
dblist = r.content.decode('ascii').splitlines()
100+
dblist = [re.sub('\t{2,}', '\t', line).split('\t') for line in dblist]
101+
102+
return dblist
103+
104+
93105

106+
# Download specific files from a physiobank database
107+
def dldatabasefiles(pbdb, dlbasedir, files, keepsubdirs = True, overwrite = False):
94108
# Full url physiobank database
95109
dburl = os.path.join(dbindexurl, pbdb)
96-
97110
# Check if the database is valid
98111
r = requests.get(dburl)
99112
r.raise_for_status()
100113

101-
# Check for a RECORDS file
102-
recordsurl = os.path.join(dburl, 'RECORDS')
103-
r = requests.get(recordsurl)
104-
if r.status_code == 404:
105-
sys.exit('The database '+dburl+' has no WFDB files to download')
106-
107-
# Get each line as a string
108-
recordlist = r.content.decode('ascii').splitlines()
109-
110-
# All files to download (relative to the database's home directory)
111-
allfiles = []
112-
113-
for rec in recordlist:
114-
# Check out whether each record is in MIT or EDF format
115-
if rec.endswith('.edf'):
116-
allfiles.append(rec)
117-
else:
118-
# If MIT format, have to figure out all associated files
119-
allfiles.append(rec+'.hea')
120-
121-
dirname, baserecname = os.path.split(rec)
122-
123-
record = records.rdheader(baserecname, pbdir = os.path.join(pbdb, dirname))
124-
125-
# Single segment record
126-
if type(record) == records.Record:
127-
# Add all dat files of the segment
128-
for file in record.filename:
129-
allfiles.append(os.path.join(dirname, file))
130-
131-
# Multi segment record
132-
else:
133-
for seg in record.segname:
134-
# Skip empty segments
135-
if seg == '~':
136-
continue
137-
# Add the header
138-
allfiles.append(os.path.join(dirname, seg+'.hea'))
139-
# Layout specifier has no dat files
140-
if seg.endswith('_layout'):
141-
continue
142-
# Add all dat files of the segment
143-
recseg = records.rdheader(seg, pbdir = os.path.join(pbdb, dirname))
144-
for file in recseg.filename:
145-
allfiles.append(os.path.join(dirname, file))
146-
147-
dlinputs = [(os.path.split(file)[1], os.path.split(file)[0], pbdb, dlbasedir, keepsubdirs, overwrite) for file in allfiles]
148-
149-
# Make the local download dir if it doesn't exist
150-
if not os.path.isdir(dlbasedir):
151-
os.makedirs(dlbasedir)
152-
print("Created local base download directory: ", dlbasedir)
114+
# Construct the urls to download
115+
dlinputs = [(os.path.split(file)[1], os.path.split(file)[0], pbdb, dlbasedir, keepsubdirs, overwrite) for file in files]
153116

154-
print('Download files...')
117+
# Make any required local directories
118+
makelocaldirs(dlbasedir, dlinputs, keepsubdirs)
155119

120+
print('Downloading files...')
156121
# Create multiple processes to download files.
157122
# Limit to 2 connections to avoid overloading the server
158123
pool = multiprocessing.Pool(processes=2)
159124
pool.map(dlpbfile, dlinputs)
160-
161125
print('Finished downloading files')
162126

163127
return
164128

165129

166-
# Download selected WFDB files from a physiobank database
167-
# def dldatabaserecords(pbdb, dlbasedir, keepsubirs = True, overwrite = False):
130+
# ---- Helper functions for downloading physiobank files ------- #
131+
132+
def getrecordlist(dburl, records):
133+
# Check for a RECORDS file
134+
if records == 'all':
135+
r = requests.get(os.path.join(dburl, 'RECORDS'))
136+
if r.status_code == 404:
137+
sys.exit('The database '+dburl+' has no WFDB files to download')
138+
139+
# Get each line as a string
140+
recordlist = r.content.decode('ascii').splitlines()
141+
# Otherwise the records are input manually
142+
else:
143+
recordlist = records
144+
145+
return recordlist
146+
147+
def getannotators(dburl, annotators):
148+
149+
if annotators is not None:
150+
# Check for an ANNOTATORS file
151+
r = requests.get(os.path.join(dburl, 'ANNOTATORS'))
152+
if r.status_code == 404:
153+
sys.exit('The database '+dburl+' has no annotation files to download')
154+
# Make sure the input annotators are present in the database
155+
annlist = r.content.decode('ascii').splitlines()
156+
annlist = [a.split('\t')[0] for a in annlist]
157+
158+
# Get the annotation file types required
159+
if annotators == 'all':
160+
# all possible ones
161+
annotators = annlist
162+
else:
163+
# In case they didn't input a list
164+
if type(annotators) == str:
165+
annotators = [annotators]
166+
# user input ones. Check validity.
167+
for a in annotators:
168+
if a not in annlist:
169+
sys.exit('The database contains no annotators with extension: ', a)
168170

171+
return annotators
172+
173+
# Make any required local directories
174+
def makelocaldirs(dlbasedir, dlinputs, keepsubdirs):
175+
176+
# Make the local download dir if it doesn't exist
177+
if not os.path.isdir(dlbasedir):
178+
os.makedirs(dlbasedir)
179+
print("Created local base download directory: ", dlbasedir)
180+
# Create all required local subdirectories
181+
# This must be out of dlpbfile to
182+
# avoid clash in multiprocessing
183+
if keepsubdirs:
184+
dldirs = set([os.path.join(dlbasedir, d[1]) for d in dlinputs])
185+
for d in dldirs:
186+
if not os.path.isdir(d):
187+
os.makedirs(d)
188+
return
169189

170190

171191
# Download a file from physiobank
192+
# The input args are to be unpacked for the use of multiprocessing
172193
def dlpbfile(inputs):
173194

174195
basefile, subdir, pbdb, dlbasedir, keepsubdirs, overwrite = inputs
@@ -187,28 +208,27 @@ def dlpbfile(inputs):
187208
# Figure out where the file should be locally
188209
if keepsubdirs:
189210
dldir = os.path.join(dlbasedir, subdir)
190-
# Make the local download subdirectory if it doesn't exist
191-
if not os.path.isdir(dldir):
192-
os.makedirs(dldir)
193-
print("Created local download subdirectory: ", dldir)
194211
else:
195212
dldir = dlbasedir
196213

197214
localfile = os.path.join(dldir, basefile)
198215

199-
# The file exists. Process accordingly.
216+
# The file exists locally.
200217
if os.path.isfile(localfile):
201218
# Redownload regardless
202219
if overwrite:
203220
dlfullfile(url, localfile)
221+
# Process accordingly.
204222
else:
205223
localfilesize = os.path.getsize(localfile)
206224
# Local file is smaller than it should be. Append it.
207225
if localfilesize < onlinefilesize:
208226
print('Detected partially downloaded file: '+localfile+' Appending file...')
209227
headers = {"Range": "bytes="+str(localfilesize)+"-", 'Accept-Encoding': '*/*'}
210228
r = requests.get(url, headers=headers, stream=True)
211-
with open(localfile, "wb") as writefile:
229+
print('headers: ', headers)
230+
print('r content length: ', len(r.content))
231+
with open(localfile, "ba") as writefile:
212232
writefile.write(r.content)
213233
print('Done appending.')
214234
# Local file is larger than it should be. Redownload.
@@ -232,4 +252,5 @@ def dlfullfile(url, localfile):
232252

233253

234254

255+
235256
dbindexurl = 'http://physionet.org/physiobank/database/'

0 commit comments

Comments
 (0)