3
3
import os
4
4
import sys
5
5
import requests
6
- import multiprocessing
7
- from . import records
6
+ from IPython .display import display
8
7
9
8
# Read a header file from physiobank
10
9
def streamheader (recordname , pbdir ):
@@ -88,87 +87,109 @@ def streamannotation(filename, pbdir):
88
87
return annbytes
89
88
90
89
91
- # Download all the WFDB files from a physiobank database
92
- def dldatabase (pbdb , dlbasedir , keepsubdirs = True , overwrite = False ):
90
+ # Return a list of all the physiobank databases available
91
+ def getdblist ():
92
+ """
93
+ Usage:
94
+ dblist = getdblist()
95
+ """
96
+ url = os .path .join (dbindexurl , 'DBS' )
97
+ r = requests .get (url )
98
+
99
+ dblist = r .content .decode ('ascii' ).splitlines ()
100
+ dblist = [re .sub ('\t {2,}' , '\t ' , line ).split ('\t ' ) for line in dblist ]
101
+
102
+ return dblist
103
+
104
+
93
105
106
+ # Download specific files from a physiobank database
107
+ def dldatabasefiles (pbdb , dlbasedir , files , keepsubdirs = True , overwrite = False ):
94
108
# Full url physiobank database
95
109
dburl = os .path .join (dbindexurl , pbdb )
96
-
97
110
# Check if the database is valid
98
111
r = requests .get (dburl )
99
112
r .raise_for_status ()
100
113
101
- # Check for a RECORDS file
102
- recordsurl = os .path .join (dburl , 'RECORDS' )
103
- r = requests .get (recordsurl )
104
- if r .status_code == 404 :
105
- sys .exit ('The database ' + dburl + ' has no WFDB files to download' )
106
-
107
- # Get each line as a string
108
- recordlist = r .content .decode ('ascii' ).splitlines ()
109
-
110
- # All files to download (relative to the database's home directory)
111
- allfiles = []
112
-
113
- for rec in recordlist :
114
- # Check out whether each record is in MIT or EDF format
115
- if rec .endswith ('.edf' ):
116
- allfiles .append (rec )
117
- else :
118
- # If MIT format, have to figure out all associated files
119
- allfiles .append (rec + '.hea' )
120
-
121
- dirname , baserecname = os .path .split (rec )
122
-
123
- record = records .rdheader (baserecname , pbdir = os .path .join (pbdb , dirname ))
124
-
125
- # Single segment record
126
- if type (record ) == records .Record :
127
- # Add all dat files of the segment
128
- for file in record .filename :
129
- allfiles .append (os .path .join (dirname , file ))
130
-
131
- # Multi segment record
132
- else :
133
- for seg in record .segname :
134
- # Skip empty segments
135
- if seg == '~' :
136
- continue
137
- # Add the header
138
- allfiles .append (os .path .join (dirname , seg + '.hea' ))
139
- # Layout specifier has no dat files
140
- if seg .endswith ('_layout' ):
141
- continue
142
- # Add all dat files of the segment
143
- recseg = records .rdheader (seg , pbdir = os .path .join (pbdb , dirname ))
144
- for file in recseg .filename :
145
- allfiles .append (os .path .join (dirname , file ))
146
-
147
- dlinputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], pbdb , dlbasedir , keepsubdirs , overwrite ) for file in allfiles ]
148
-
149
- # Make the local download dir if it doesn't exist
150
- if not os .path .isdir (dlbasedir ):
151
- os .makedirs (dlbasedir )
152
- print ("Created local base download directory: " , dlbasedir )
114
+ # Construct the urls to download
115
+ dlinputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], pbdb , dlbasedir , keepsubdirs , overwrite ) for file in files ]
153
116
154
- print ('Download files...' )
117
+ # Make any required local directories
118
+ makelocaldirs (dlbasedir , dlinputs , keepsubdirs )
155
119
120
+ print ('Downloading files...' )
156
121
# Create multiple processes to download files.
157
122
# Limit to 2 connections to avoid overloading the server
158
123
pool = multiprocessing .Pool (processes = 2 )
159
124
pool .map (dlpbfile , dlinputs )
160
-
161
125
print ('Finished downloading files' )
162
126
163
127
return
164
128
165
129
166
- # Download selected WFDB files from a physiobank database
167
- # def dldatabaserecords(pbdb, dlbasedir, keepsubirs = True, overwrite = False):
130
+ # ---- Helper functions for downloading physiobank files ------- #
131
+
132
+ def getrecordlist (dburl , records ):
133
+ # Check for a RECORDS file
134
+ if records == 'all' :
135
+ r = requests .get (os .path .join (dburl , 'RECORDS' ))
136
+ if r .status_code == 404 :
137
+ sys .exit ('The database ' + dburl + ' has no WFDB files to download' )
138
+
139
+ # Get each line as a string
140
+ recordlist = r .content .decode ('ascii' ).splitlines ()
141
+ # Otherwise the records are input manually
142
+ else :
143
+ recordlist = records
144
+
145
+ return recordlist
146
+
147
+ def getannotators (dburl , annotators ):
148
+
149
+ if annotators is not None :
150
+ # Check for an ANNOTATORS file
151
+ r = requests .get (os .path .join (dburl , 'ANNOTATORS' ))
152
+ if r .status_code == 404 :
153
+ sys .exit ('The database ' + dburl + ' has no annotation files to download' )
154
+ # Make sure the input annotators are present in the database
155
+ annlist = r .content .decode ('ascii' ).splitlines ()
156
+ annlist = [a .split ('\t ' )[0 ] for a in annlist ]
157
+
158
+ # Get the annotation file types required
159
+ if annotators == 'all' :
160
+ # all possible ones
161
+ annotators = annlist
162
+ else :
163
+ # In case they didn't input a list
164
+ if type (annotators ) == str :
165
+ annotators = [annotators ]
166
+ # user input ones. Check validity.
167
+ for a in annotators :
168
+ if a not in annlist :
169
+ sys .exit ('The database contains no annotators with extension: ' , a )
168
170
171
+ return annotators
172
+
173
+ # Make any required local directories
174
+ def makelocaldirs (dlbasedir , dlinputs , keepsubdirs ):
175
+
176
+ # Make the local download dir if it doesn't exist
177
+ if not os .path .isdir (dlbasedir ):
178
+ os .makedirs (dlbasedir )
179
+ print ("Created local base download directory: " , dlbasedir )
180
+ # Create all required local subdirectories
181
+ # This must be out of dlpbfile to
182
+ # avoid clash in multiprocessing
183
+ if keepsubdirs :
184
+ dldirs = set ([os .path .join (dlbasedir , d [1 ]) for d in dlinputs ])
185
+ for d in dldirs :
186
+ if not os .path .isdir (d ):
187
+ os .makedirs (d )
188
+ return
169
189
170
190
171
191
# Download a file from physiobank
192
+ # The input args are to be unpacked for the use of multiprocessing
172
193
def dlpbfile (inputs ):
173
194
174
195
basefile , subdir , pbdb , dlbasedir , keepsubdirs , overwrite = inputs
@@ -187,28 +208,27 @@ def dlpbfile(inputs):
187
208
# Figure out where the file should be locally
188
209
if keepsubdirs :
189
210
dldir = os .path .join (dlbasedir , subdir )
190
- # Make the local download subdirectory if it doesn't exist
191
- if not os .path .isdir (dldir ):
192
- os .makedirs (dldir )
193
- print ("Created local download subdirectory: " , dldir )
194
211
else :
195
212
dldir = dlbasedir
196
213
197
214
localfile = os .path .join (dldir , basefile )
198
215
199
- # The file exists. Process accordingly.
216
+ # The file exists locally.
200
217
if os .path .isfile (localfile ):
201
218
# Redownload regardless
202
219
if overwrite :
203
220
dlfullfile (url , localfile )
221
+ # Process accordingly.
204
222
else :
205
223
localfilesize = os .path .getsize (localfile )
206
224
# Local file is smaller than it should be. Append it.
207
225
if localfilesize < onlinefilesize :
208
226
print ('Detected partially downloaded file: ' + localfile + ' Appending file...' )
209
227
headers = {"Range" : "bytes=" + str (localfilesize )+ "-" , 'Accept-Encoding' : '*/*' }
210
228
r = requests .get (url , headers = headers , stream = True )
211
- with open (localfile , "wb" ) as writefile :
229
+ print ('headers: ' , headers )
230
+ print ('r content length: ' , len (r .content ))
231
+ with open (localfile , "ba" ) as writefile :
212
232
writefile .write (r .content )
213
233
print ('Done appending.' )
214
234
# Local file is larger than it should be. Redownload.
@@ -232,4 +252,5 @@ def dlfullfile(url, localfile):
232
252
233
253
234
254
255
+
235
256
dbindexurl = 'http://physionet.org/physiobank/database/'
0 commit comments