diff --git a/MANIFEST.in b/MANIFEST.in index 3759a060..0ff3f716 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include LICENSE # Include readme file -include README.md +include README.rst # Include the data files # recursive-include data * diff --git a/README.rst b/README.rst index 9d155590..f8ac39d2 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,8 @@ the metadata as a dictionary. :: - sig, fields = rdsamp(recordname, sampfrom=0, sampto=[], channels=[], physical=1, stacksegments=1, pbdl=0, dldir=os.cwd(), keepfiles=0) + sig, fields = rdsamp(recordname, sampfrom=0, sampto=[], channels=[], physical=1, + stacksegments=1, pbdl=0, dldir=os.cwd()) Example Usage: @@ -62,8 +63,6 @@ Input Arguments: than a local directory. - ``dldir`` (default=os.getcwd()): The directory to download physiobank files to. -- ``keepfiles`` (default=0): Flag specifying whether to keep physiobank - files newly downloaded through the function call. Output Arguments: @@ -96,7 +95,8 @@ the fields as lists or arrays. :: - annsamp, anntype, subtype, chan, num, aux, annfs = wfdb.rdann(recordname, annot, sampfrom=0, sampto=[], anndisp=1) + annsamp, anntype, subtype, chan, num, aux, annfs = wfdb.rdann(recordname, + annot, sampfrom=0, sampto=[], anndisp=1) Example Usage: diff --git a/devtests.ipynb b/devtests.ipynb index 5e33f0ce..7198a983 100644 --- a/devtests.ipynb +++ b/devtests.ipynb @@ -803,28 +803,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created local directory: /home/cx1111/Downloads/wfdbrecords/macecgdb\n", - "Downloading missing file(s) into directory: /home/cx1111/Downloads/wfdbrecords/macecgdb\n", - "Downloaded all missing files for record.\n", - "[[ 1.00000000e-01 -3.05110000e+02 -2.87190000e+02 -2.66720000e+02]\n", - " [ -2.87190000e+02 -2.71830000e+02 -2.87190000e+02 1.10000000e-01]\n", - " [ -2.99990000e+02 -2.87190000e+02 -2.64160000e+02 -2.87190000e+02]\n", - " ..., \n", - " [ -2.79520000e+02 -2.87190000e+02 -2.66710000e+02 -2.87190000e+02]\n", - " [ 1.20000000e-01 -2.00160000e+02 -2.87190000e+02 -2.79520000e+02]\n", - " [ -2.87190000e+02 -2.84640000e+02 -2.87190000e+02 1.20000000e-01]]\n" - ] - } - ], + "outputs": [], "source": [ "import wfdb\n", "sig, fields=wfdb.rdsamp('macecgdb/test01_00s', pbdl=1, dldir='/home/cx1111/Downloads/wfdbrecords/macecgdb')\n", @@ -848,109 +831,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Help on built-in function array in module numpy.core.multiarray:\n", - "\n", - "array(...)\n", - " array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0)\n", - " \n", - " Create an array.\n", - " \n", - " Parameters\n", - " ----------\n", - " object : array_like\n", - " An array, any object exposing the array interface, an\n", - " object whose __array__ method returns an array, or any\n", - " (nested) sequence.\n", - " dtype : data-type, optional\n", - " The desired data-type for the array. If not given, then\n", - " the type will be determined as the minimum type required\n", - " to hold the objects in the sequence. This argument can only\n", - " be used to 'upcast' the array. For downcasting, use the\n", - " .astype(t) method.\n", - " copy : bool, optional\n", - " If true (default), then the object is copied. Otherwise, a copy\n", - " will only be made if __array__ returns a copy, if obj is a\n", - " nested sequence, or if a copy is needed to satisfy any of the other\n", - " requirements (`dtype`, `order`, etc.).\n", - " order : {'C', 'F', 'A'}, optional\n", - " Specify the order of the array. If order is 'C', then the array\n", - " will be in C-contiguous order (last-index varies the fastest).\n", - " If order is 'F', then the returned array will be in\n", - " Fortran-contiguous order (first-index varies the fastest).\n", - " If order is 'A' (default), then the returned array may be\n", - " in any order (either C-, Fortran-contiguous, or even discontiguous),\n", - " unless a copy is required, in which case it will be C-contiguous.\n", - " subok : bool, optional\n", - " If True, then sub-classes will be passed-through, otherwise\n", - " the returned array will be forced to be a base-class array (default).\n", - " ndmin : int, optional\n", - " Specifies the minimum number of dimensions that the resulting\n", - " array should have. Ones will be pre-pended to the shape as\n", - " needed to meet this requirement.\n", - " \n", - " Returns\n", - " -------\n", - " out : ndarray\n", - " An array object satisfying the specified requirements.\n", - " \n", - " See Also\n", - " --------\n", - " empty, empty_like, zeros, zeros_like, ones, ones_like, fill\n", - " \n", - " Examples\n", - " --------\n", - " >>> np.array([1, 2, 3])\n", - " array([1, 2, 3])\n", - " \n", - " Upcasting:\n", - " \n", - " >>> np.array([1, 2, 3.0])\n", - " array([ 1., 2., 3.])\n", - " \n", - " More than one dimension:\n", - " \n", - " >>> np.array([[1, 2], [3, 4]])\n", - " array([[1, 2],\n", - " [3, 4]])\n", - " \n", - " Minimum dimensions 2:\n", - " \n", - " >>> np.array([1, 2, 3], ndmin=2)\n", - " array([[1, 2, 3]])\n", - " \n", - " Type provided:\n", - " \n", - " >>> np.array([1, 2, 3], dtype=complex)\n", - " array([ 1.+0.j, 2.+0.j, 3.+0.j])\n", - " \n", - " Data-type consisting of more than one element:\n", - " \n", - " >>> x = np.array([(1,2),(3,4)],dtype=[('a','>> x['a']\n", - " array([1, 3])\n", - " \n", - " Creating an array from sub-classes:\n", - " \n", - " >>> np.array(np.mat('1 2; 3 4'))\n", - " array([[1, 2],\n", - " [3, 4]])\n", - " \n", - " >>> np.array(np.mat('1 2; 3 4'), subok=True)\n", - " matrix([[1, 2],\n", - " [3, 4]])\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -959,64 +844,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Downloading missing file(s) into directory: /home/cx1111/PhysionetProjects/wfdb-python\n", - "Downloaded all missing files for record.\n" - ] - }, - { - "data": { - "text/plain": [ - "(array([[ 1.00000000e-01, -3.05110000e+02, -2.87190000e+02,\n", - " -2.66720000e+02],\n", - " [ -2.87190000e+02, -2.71830000e+02, -2.87190000e+02,\n", - " 1.10000000e-01],\n", - " [ -2.99990000e+02, -2.87190000e+02, -2.64160000e+02,\n", - " -2.87190000e+02],\n", - " ..., \n", - " [ -2.79520000e+02, -2.87190000e+02, -2.66710000e+02,\n", - " -2.87190000e+02],\n", - " [ 1.20000000e-01, -2.00160000e+02, -2.87190000e+02,\n", - " -2.79520000e+02],\n", - " [ -2.87190000e+02, -2.84640000e+02, -2.87190000e+02,\n", - " 1.20000000e-01]]),\n", - " {'basedate': '',\n", - " 'baseline': [0, 0, 0, 0],\n", - " 'basetime': '',\n", - " 'byteoffset': [0, 0, 0, 0],\n", - " 'comments': [' : 25 : M : (none) : (none)'],\n", - " 'filename': ['test01_00s.dat',\n", - " 'test01_00s.dat',\n", - " 'test01_00s.dat',\n", - " 'test01_00s.dat'],\n", - " 'fmt': ['16', '16', '16', '16'],\n", - " 'fs': 500.0,\n", - " 'gain': [100.0, 100.0, 100.0, 100.0],\n", - " 'initvalue': [10, -8, -57, -66],\n", - " 'nsamp': 4000,\n", - " 'nsampseg': [],\n", - " 'nseg': 1,\n", - " 'nsig': 4,\n", - " 'sampsperframe': [1, 1, 1, 1],\n", - " 'signame': ['ECG', 'ECG', 'ECG', 'ECG'],\n", - " 'skew': [0, 0, 0, 0],\n", - " 'units': ['mV', 'mV', 'mV', 'mV']})" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import wfdb\n", "\n", diff --git a/wfdb/__init__.py b/wfdb/__init__.py index 4b23dacb..0cfb27f4 100644 --- a/wfdb/__init__.py +++ b/wfdb/__init__.py @@ -1,3 +1,5 @@ from ._rdsamp import rdsamp from ._rdann import rdann -from ._plotwfdb import plotwfdb \ No newline at end of file +from ._rdheader import rdheader +from ._plotwfdb import plotwfdb +from ._downloadsamp import downloadsamp diff --git a/wfdb/_downloadsamp.py b/wfdb/_downloadsamp.py new file mode 100644 index 00000000..7ffda5c6 --- /dev/null +++ b/wfdb/_downloadsamp.py @@ -0,0 +1,124 @@ +import numpy as np +import re +import os +import sys +import requests +from ._rdheader import rdheader + +def downloadsamp(pbrecname, targetdir): + """Check a specified local directory for all necessary files required to read a Physiobank + record, and download any missing files into the same directory. Returns a list of files + downloaded, or exits with error if an invalid Physiobank record is specified. + + Usage: dledfiles = dlrecordfiles(pbrecname, targetdir) + + Input arguments: + - pbrecname (required): The name of the MIT format Physiobank record to be read, prepended + with the Physiobank subdirectory the file is contained in (without any file extensions). + eg. pbrecname=prcp/12726 to download files http://physionet.org/physiobank/database/prcp/12726.hea + and 12727.dat + - targetdir (required): The local directory to check for files required to read the record, + in which missing files are also downloaded. + + Output arguments: + - dledfiles: The list of files downloaded from PhysioBank. + + """ + + physioneturl = "http://physionet.org/physiobank/database/" + pbdir, baserecname = os.path.split(pbrecname) + displaydlmsg=1 + dledfiles = [] + + if not os.path.isdir(targetdir): # Make the target dir if it doesn't exist + os.makedirs(targetdir) + print("Created local directory: ", targetdir) + + # For any missing file, check if the input physiobank record name is + # valid, ie whether the file exists on physionet. Download if valid, exit + # if invalid. + dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+baserecname+".hea", os.path.join(targetdir, + baserecname+".hea"), dledfiles, displaydlmsg, targetdir) + + fields = rdheader(os.path.join(targetdir, baserecname)) + + # Need to check validity of link if ANY file is missing. + if fields["nseg"] == 1: # Single segment. Check for all the required dat files + for f in set(fields["filename"]): + # Missing dat file + dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+f, os.path.join(targetdir, f), + dledfiles, displaydlmsg, targetdir) + else: # Multi segment. Check for all segment headers and their dat files + for segment in fields["filename"]: + if segment != '~': + # Check the segment header + dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+segment+".hea", + os.path.join(targetdir, segment+".hea"), dledfiles, displaydlmsg, targetdir) + segfields = rdheader(os.path.join(targetdir, segment)) + for f in set(segfields["filename"]): + if f != '~': + # Check the segment's dat file + dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+f, + os.path.join(targetdir, f), dledfiles, displaydlmsg, targetdir) + + if dledfiles: + print('Downloaded all missing files for record.') + return dledfiles # downloaded files + + +# Download a file if it is missing. Also error check 0 byte files. +def dlifmissing(url, filename, dledfiles, displaydlmsg, targetdir): + fileexists = os.path.isfile(filename) + if fileexists: + # Likely interrupted download + if os.path.getsize(filename)==0: + try: + input = raw_input + except NameError: + pass + userresponse=input("Warning: File "+filename+" is 0 bytes.\n" + "Likely interrupted download. Remove file and redownload? [y/n]: ") + # override input for python 2 compatibility + while userresponse not in ['y','n']: + userresponse=input("Remove file and redownload? [y/n]: ") + if userresponse=='y': + os.remove(filename) + dledfiles.append(dlorexit(url, filename, displaydlmsg, targetdir)) + displaydlmsg=0 + else: + print("Skipping download.") + # File is already present. + else: + print("File "+filename+" is already present.") + else: + dledfiles.append(dlorexit(url, filename, displaydlmsg, targetdir)) + displaydlmsg=0 + + # If a file gets downloaded, displaydlmsg is set to 0. No need to print the message more than once. + return dledfiles, displaydlmsg + + +# Download the file from the specified 'url' as the 'filename', or exit with warning. +def dlorexit(url, filename, displaydlmsg=0, targetdir=[]): + if displaydlmsg: # We want this message to be called once for all files downloaded. + print('Downloading missing file(s) into directory: {}'.format(targetdir)) + try: + r = requests.get(url) + with open(filename, "wb") as writefile: + writefile.write(r.content) + return filename + except requests.HTTPError: + sys.exit("Attempted to download invalid target file: " + url) + + +# Download files required to read a wfdb annotation. +def dlannfiles(): + return dledfiles + + +# Download all the records in a physiobank database. +def dlPBdatabase(database, targetdir): + return dledfiles + + + diff --git a/wfdb/_rdheader.py b/wfdb/_rdheader.py new file mode 100644 index 00000000..445d864e --- /dev/null +++ b/wfdb/_rdheader.py @@ -0,0 +1,186 @@ +import numpy as np +import re +import os +import sys +import requests + +def rdheader(recordname): # For reading signal headers + + # To do: Allow exponential input format for some fields + + # Output dictionary + fields = { + 'nseg': [], + 'nsig': [], + 'fs': [], + 'nsamp': [], + 'basetime': [], + 'basedate': [], + 'filename': [], + 'fmt': [], + 'sampsperframe': [], + 'skew': [], + 'byteoffset': [], + 'gain': [], + 'units': [], + 'baseline': [], + 'initvalue': [], + 'signame': [], + 'nsampseg': [], + 'comments': []} + # filename stores file names for both multi and single segment headers. + # nsampseg is only for multi-segment + + + # RECORD LINE fields (o means optional, delimiter is space or tab unless specified): + # record name, nsegments (o, delim=/), nsignals, fs (o), counter freq (o, delim=/, needs fs), + # base counter (o, delim=(), needs counter freq), nsamples (o, needs fs), base time (o), + # base date (o needs base time). + + # Regexp object for record line + rxRECORD = re.compile( + ''.join( + [ + "(?P[\w]+)/?(?P\d*)[ \t]+", + "(?P\d+)[ \t]*", + "(?P\d*\.?\d*)/*(?P\d*\.?\d*)\(?(?P\d*\.?\d*)\)?[ \t]*", + "(?P\d*)[ \t]*", + "(?P\d*:?\d{,2}:?\d{,2}\.?\d*)[ \t]*", + "(?P\d{,2}/?\d{,2}/?\d{,4})"])) + # Watch out for potential floats: fs (and also exponent notation...), + # counterfs, basecounter + + # SIGNAL LINE fields (o means optional, delimiter is space or tab unless specified): + # file name, format, samplesperframe(o, delim=x), skew(o, delim=:), byteoffset(o,delim=+), + # ADCgain(o), baseline(o, delim=(), requires ADCgain), units(o, delim=/, requires baseline), + # ADCres(o, requires ADCgain), ADCzero(o, requires ADCres), initialvalue(o, requires ADCzero), + # checksum(o, requires initialvalue), blocksize(o, requires checksum), + # signame(o, requires block) + + # Regexp object for signal lines. Consider flexible filenames, and also ~ + rxSIGNAL = re.compile( + ''.join( + [ + "(?P[\w]*\.?[\w]*~?)[ \t]+(?P\d+)x?" + "(?P\d*):?(?P\d*)\+?(?P\d*)[ \t]*", + "(?P-?\d*\.?\d*e?[\+-]?\d*)\(?(?P-?\d*)\)?/?(?P[\w\^/-]*)[ \t]*", + "(?P\d*)[ \t]*(?P-?\d*)[ \t]*(?P-?\d*)[ \t]*", + "(?P-?\d*)[ \t]*(?P\d*)[ \t]*(?P[\S]*)"])) + + # Units characters: letters, numbers, /, ^, -, + # Watch out for potentially negative fields: baseline, ADCzero, initialvalue, checksum, + # Watch out for potential float: ADCgain. + + # Read the header file and get the comment and non-comment lines + headerlines, commentlines = getheaderlines(recordname) + + # Get record line parameters + (_, nseg, nsig, fs, counterfs, basecounter, nsamp, + basetime, basedate) = rxRECORD.findall(headerlines[0])[0] + + # These fields are either mandatory or set to defaults. + if not nseg: + nseg = '1' + if not fs: + fs = '250' + + fields['nseg'] = int(nseg) + fields['fs'] = float(fs) + fields['nsig'] = int(nsig) + + # These fields might by empty + if nsamp: + fields['nsamp'] = int(nsamp) + fields['basetime'] = basetime + fields['basedate'] = basedate + + + # Signal or Segment line paramters + # Multi segment header - Process segment spec lines in current master + # header. + if int(nseg) > 1: + for i in range(0, int(nseg)): + (filename, nsampseg) = re.findall( + '(?P\w*~?)[ \t]+(?P\d+)', headerlines[i + 1])[0] + fields["filename"].append(filename) + fields["nsampseg"].append(int(nsampseg)) + # Single segment header - Process signal spec lines in regular header. + else: + for i in range(0, int(nsig)): # will not run if nsignals=0 + # get signal line parameters + (filename, + fmt, + sampsperframe, + skew, + byteoffset, + adcgain, + baseline, + units, + adcres, + adczero, + initvalue, + checksum, + blocksize, + signame) = rxSIGNAL.findall(headerlines[i + 1])[0] + + # Setting defaults + if not sampsperframe: + # Setting strings here so we can always convert strings case + # below. + sampsperframe = '1' + if not skew: + skew = '0' + if not byteoffset: + byteoffset = '0' + if not adcgain: + adcgain = '200' + if not baseline: + if not adczero: + baseline = '0' + else: + baseline = adczero # missing baseline actually takes adczero value if present + if not units: + units = 'mV' + if not initvalue: + initvalue = '0' + if not signame: + signame = "ch" + str(i + 1) + if not initvalue: + initvalue = '0' + + fields["filename"].append(filename) + fields["fmt"].append(fmt) + fields["sampsperframe"].append(int(sampsperframe)) + fields["skew"].append(int(skew)) + fields['byteoffset'].append(int(byteoffset)) + fields["gain"].append(float(adcgain)) + fields["baseline"].append(int(baseline)) + fields["units"].append(units) + fields["initvalue"].append(int(initvalue)) + fields["signame"].append(signame) + + for comment in commentlines: + fields["comments"].append(comment.strip('\s#')) + + return fields + + +# Read header file to get comment and non-comment lines +def getheaderlines(recordname): + with open(recordname + ".hea", 'r') as fp: + headerlines = [] # Store record line followed by the signal lines if any + commentlines = [] # Comments + for line in fp: + line = line.strip() + if line.startswith('#'): # comment line + commentlines.append(line) + elif line: # Non-empty non-comment line = header line. + ci = line.find('#') + if ci > 0: + headerlines.append(line[:ci]) # header line + # comment on same line as header line + commentlines.append(line[ci:]) + else: + headerlines.append(line) + return headerlines, commentlines + diff --git a/wfdb/_rdsamp.py b/wfdb/_rdsamp.py index 0431a5d8..795097e5 100644 --- a/wfdb/_rdsamp.py +++ b/wfdb/_rdsamp.py @@ -3,294 +3,12 @@ import os import sys import requests +from ._rdheader import rdheader +from ._downloadsamp import downloadsamp -def dlrecordfiles(pbrecname, targetdir): - """Check a specified local directory for all necessary files required to read a Physiobank - record, and download any missing files into the same directory. Returns a list of files - downloaded, or exits with error if an invalid Physiobank record is specified. - Usage: dledfiles = dlrecordfiles(pbrecname, targetdir) - - Input arguments: - - pbrecname (required): The name of the MIT format Physiobank record to be read, prepended - with the Physiobank subdirectory the file is contained in (without any file extensions). - eg. pbrecname=prcp/12726 to download files http://physionet.org/physiobank/database/prcp/12726.hea - and 12727.dat - - targetdir (required): The local directory to check for files required to read the record, - in which missing files are also downloaded. - - Output arguments: - - dledfiles: The list of files downloaded from PhysioBank. - - """ - - physioneturl = "http://physionet.org/physiobank/database/" - pbdir, baserecname = os.path.split(pbrecname) - displaydlmsg=1 - dledfiles = [] - - if not os.path.isdir(targetdir): # Make the target dir if it doesn't exist - os.makedirs(targetdir) - print("Created local directory: ", targetdir) - - # For any missing file, check if the input physiobank record name is - # valid, ie whether the file exists on physionet. Download if valid, exit - # if invalid. - dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+baserecname+".hea", os.path.join(targetdir, baserecname+".hea"), dledfiles, displaydlmsg, targetdir) - - fields = readheader(os.path.join(targetdir, baserecname)) - - # Need to check validity of link if ANY file is missing. - if fields["nseg"] == 1: # Single segment. Check for all the required dat files - for f in set(fields["filename"]): - # Missing dat file - dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+f, os.path.join(targetdir, f), dledfiles, displaydlmsg, targetdir) - else: # Multi segment. Check for all segment headers and their dat files - for segment in fields["filename"]: - if segment != '~': - # Check the segment header - dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+segment+".hea", os.path.join(targetdir, segment+".hea"), dledfiles, displaydlmsg, targetdir) - segfields = readheader(os.path.join(targetdir, segment)) - for f in set(segfields["filename"]): - if f != '~': - # Check the segment's dat file - dledfiles, displaydlmsg = dlifmissing(physioneturl+pbdir+"/"+f, os.path.join(targetdir, f), dledfiles, displaydlmsg, targetdir) - - if dledfiles: - print('Downloaded all missing files for record.') - return dledfiles # downloaded files - - -# Download a file if it is missing. Also error check 0 byte files. -def dlifmissing(url, filename, dledfiles, displaydlmsg, targetdir): - - if os.path.isfile(filename): - # Likely interrupted download - if os.path.getsize(filename)==0: - userresponse=input("Warning - File "+filename+" is 0 bytes. Likely interrupted download.\nRemove file and redownload? [y/n] - ") - while userresponse not in ['y','n']: - userresponse=input("Remove file and redownload? [y/n] - ") - if userresponse=='y': - os.remove(filename) - dledfiles.append(dlorexit(url, filename, displaydlmsg, targetdir)) - displaydlmsg=0 - else: - print("Skipping download.") - # File is already present. - else: - print("File "+filename+" is already present.") - else: - dledfiles.append(dlorexit(url, filename, displaydlmsg, targetdir)) - displaydlmsg=0 - - # If a file gets downloaded, displaydlmsg is set to 0. No need to print the message more than once. - return dledfiles, displaydlmsg - - -# Download the file from the specified 'url' as the 'filename', or exit with warning. -def dlorexit(url, filename, displaydlmsg=0, targetdir=[]): - if displaydlmsg: # We want this message to be called once for all files downloaded. - print('Downloading missing file(s) into directory: {}'.format(targetdir)) - try: - r = requests.get(url) - with open(filename, "wb") as writefile: - writefile.write(r.content) - return filename - except requests.HTTPError: - sys.exit("Attempted to download invalid target file: " + url) - - -# Download files required to read a wfdb annotation. -def dlannfiles(): - return dledfiles - - -# Download all the records in a physiobank database. -def dlPBdatabase(database, targetdir): - return dledfiles - -# Read header file to get comment and non-comment lines -def getheaderlines(recordname): - with open(recordname + ".hea", 'r') as fp: - headerlines = [] # Store record line followed by the signal lines if any - commentlines = [] # Comments - for line in fp: - line = line.strip() - if line.startswith('#'): # comment line - commentlines.append(line) - elif line: # Non-empty non-comment line = header line. - ci = line.find('#') - if ci > 0: - headerlines.append(line[:ci]) # header line - # comment on same line as header line - commentlines.append(line[ci:]) - else: - headerlines.append(line) - return headerlines, commentlines - -def readheader(recordname): # For reading signal headers - - # To do: Allow exponential input format for some fields - - # Output dictionary - fields = { - 'nseg': [], - 'nsig': [], - 'fs': [], - 'nsamp': [], - 'basetime': [], - 'basedate': [], - 'filename': [], - 'fmt': [], - 'sampsperframe': [], - 'skew': [], - 'byteoffset': [], - 'gain': [], - 'units': [], - 'baseline': [], - 'initvalue': [], - 'signame': [], - 'nsampseg': [], - 'comments': []} - # filename stores file names for both multi and single segment headers. - # nsampseg is only for multi-segment - - - # RECORD LINE fields (o means optional, delimiter is space or tab unless specified): - # record name, nsegments (o, delim=/), nsignals, fs (o), counter freq (o, delim=/, needs fs), - # base counter (o, delim=(), needs counter freq), nsamples (o, needs fs), base time (o), - # base date (o needs base time). - - # Regexp object for record line - rxRECORD = re.compile( - ''.join( - [ - "(?P[\w]+)/?(?P\d*)[ \t]+", - "(?P\d+)[ \t]*", - "(?P\d*\.?\d*)/*(?P\d*\.?\d*)\(?(?P\d*\.?\d*)\)?[ \t]*", - "(?P\d*)[ \t]*", - "(?P\d*:?\d{,2}:?\d{,2}\.?\d*)[ \t]*", - "(?P\d{,2}/?\d{,2}/?\d{,4})"])) - # Watch out for potential floats: fs (and also exponent notation...), - # counterfs, basecounter - - # SIGNAL LINE fields (o means optional, delimiter is space or tab unless specified): - # file name, format, samplesperframe(o, delim=x), skew(o, delim=:), byteoffset(o,delim=+), - # ADCgain(o), baseline(o, delim=(), requires ADCgain), units(o, delim=/, requires baseline), - # ADCres(o, requires ADCgain), ADCzero(o, requires ADCres), initialvalue(o, requires ADCzero), - # checksum(o, requires initialvalue), blocksize(o, requires checksum), - # signame(o, requires block) - - # Regexp object for signal lines. Consider flexible filenames, and also ~ - rxSIGNAL = re.compile( - ''.join( - [ - "(?P[\w]*\.?[\w]*~?)[ \t]+(?P\d+)x?" - "(?P\d*):?(?P\d*)\+?(?P\d*)[ \t]*", - "(?P-?\d*\.?\d*e?[\+-]?\d*)\(?(?P-?\d*)\)?/?(?P[\w\^/-]*)[ \t]*", - "(?P\d*)[ \t]*(?P-?\d*)[ \t]*(?P-?\d*)[ \t]*", - "(?P-?\d*)[ \t]*(?P\d*)[ \t]*(?P[\S]*)"])) - - # Units characters: letters, numbers, /, ^, -, - # Watch out for potentially negative fields: baseline, ADCzero, initialvalue, checksum, - # Watch out for potential float: ADCgain. - - # Read the header file and get the comment and non-comment lines - headerlines, commentlines = getheaderlines(recordname) - - # Get record line parameters - (_, nseg, nsig, fs, counterfs, basecounter, nsamp, - basetime, basedate) = rxRECORD.findall(headerlines[0])[0] - - # These fields are either mandatory or set to defaults. - if not nseg: - nseg = '1' - if not fs: - fs = '250' - - fields['nseg'] = int(nseg) - fields['fs'] = float(fs) - fields['nsig'] = int(nsig) - - # These fields might by empty - if nsamp: - fields['nsamp'] = int(nsamp) - fields['basetime'] = basetime - fields['basedate'] = basedate - - - # Signal or Segment line paramters - # Multi segment header - Process segment spec lines in current master - # header. - if int(nseg) > 1: - for i in range(0, int(nseg)): - (filename, nsampseg) = re.findall( - '(?P\w*~?)[ \t]+(?P\d+)', headerlines[i + 1])[0] - fields["filename"].append(filename) - fields["nsampseg"].append(int(nsampseg)) - # Single segment header - Process signal spec lines in regular header. - else: - for i in range(0, int(nsig)): # will not run if nsignals=0 - # get signal line parameters - (filename, - fmt, - sampsperframe, - skew, - byteoffset, - adcgain, - baseline, - units, - adcres, - adczero, - initvalue, - checksum, - blocksize, - signame) = rxSIGNAL.findall(headerlines[i + 1])[0] - - # Setting defaults - if not sampsperframe: - # Setting strings here so we can always convert strings case - # below. - sampsperframe = '1' - if not skew: - skew = '0' - if not byteoffset: - byteoffset = '0' - if not adcgain: - adcgain = '200' - if not baseline: - if not adczero: - baseline = '0' - else: - baseline = adczero # missing baseline actually takes adczero value if present - if not units: - units = 'mV' - if not initvalue: - initvalue = '0' - if not signame: - signame = "ch" + str(i + 1) - if not initvalue: - initvalue = '0' - - fields["filename"].append(filename) - fields["fmt"].append(fmt) - fields["sampsperframe"].append(int(sampsperframe)) - fields["skew"].append(int(skew)) - fields['byteoffset'].append(int(byteoffset)) - fields["gain"].append(float(adcgain)) - fields["baseline"].append(int(baseline)) - fields["units"].append(units) - fields["initvalue"].append(int(initvalue)) - fields["signame"].append(signame) - - for comment in commentlines: - fields["comments"].append(comment.strip('\s#')) - - return fields - - - -def skewsignal(sig, skew, fp, nsig, fmt, siglen, sampfrom, sampto, startbyte, nbytesread, byteoffset, sampsperframe, tsampsperframe): +def skewsignal(sig, skew, fp, nsig, fmt, siglen, sampfrom, sampto, startbyte, + nbytesread, byteoffset, sampsperframe, tsampsperframe): if max(skew) > 0: # Array of samples to fill in the final samples of the skewed channels. extrasig = np.empty([max(skew), nsig]) @@ -382,7 +100,8 @@ def readdat( fp, fmt, sampto - sampfrom, nsig, sampsperframe, floorsamp) # Shift the samples in the channels with skew if any - sig=skewsignal(sig, skew, fp, nsig, fmt, siglen, sampfrom, sampto, startbyte, nbytesread, byteoffset, sampsperframe, tsampsperframe) + sig=skewsignal(sig, skew, fp, nsig, fmt, siglen, sampfrom, sampto, startbyte, + nbytesread, byteoffset, sampsperframe, tsampsperframe) fp.close() @@ -823,7 +542,7 @@ def fixedorvariable(fields, dirname): 0] == 0: # variable layout - first segment is layout specification file startseg = 1 # Store the layout header info. - layoutfields = readheader( + layoutfields = rdheader( os.path.join( dirname, fields["filename"][0])) @@ -910,7 +629,7 @@ def getsegmentchannels(startseg, segrecordname, dirname, layoutfields, channels) segchannels = channels else: # Variable layout signal. Work out which channels from the segment to load if any. if segrecordname != '~': - sfields = readheader(os.path.join(dirname, segrecordname)) + sfields = rdheader(os.path.join(dirname, segrecordname)) wantsignals = [layoutfields["signame"][c] for c in channels] # Signal names of wanted channels segchannels = [] # The channel numbers wanted that are contained in the segment returninds = [] # 1 and 0 marking channels of the numpy array to be filled by @@ -946,16 +665,15 @@ def expandfields(segmentfields, segnum, startseg, readsegs, channels, returninds return segmentfields -def checkrecordfiles(recordname, pbdl, dldir, keepfiles): +def checkrecordfiles(recordname, pbdl, dldir): """Figure out the directory in which to process record files and download missing files if specified. *If you wish to directly download files for a record, call - 'dlrecordfiles'. This is a helper function for rdsamp. + 'downloadsamp'. This is a helper function for rdsamp. Input arguments: - recordname: name of the record - pbdl: flag specifying whether a physiobank record should be downloaded - dldir: directory in which to download physiobank files - - keepfiles: flag specifying whether to keep downloaded files Output arguments: - dirname: the directory name from where the data files will be read @@ -967,16 +685,15 @@ def checkrecordfiles(recordname, pbdl, dldir, keepfiles): # Download physiobank files if specified if pbdl == 1: - dledfiles = dlrecordfiles(recordname, dldir) - if keepfiles==0: - filestoremove = dledfiles + dledfiles = downloadsamp(recordname, dldir) + # The directory to read the files from is the downloaded directory dirname = dldir (_, baserecordname)= os.path.split(recordname) else: dirname, baserecordname = os.path.split(recordname) - return dirname, baserecordname, filestoremove + return dirname, baserecordname @@ -988,23 +705,31 @@ def rdsamp( physical=1, stacksegments=1, pbdl=0, - dldir=os.getcwd(), - keepfiles=0): + dldir=os.getcwd()): """Read a WFDB record and return the signal as a numpy array and the metadata as a dictionary. Usage: - sig, fields = rdsamp(recordname, sampfrom=0, sampto=[], channels=[], physical=1, stacksegments=1, pbdl=0, dldir=os.cwd(), keepfiles=0) + sig, fields = rdsamp(recordname, sampfrom=0, sampto=[], channels=[], physical=1, stacksegments=1, + pbdl=0, dldir=os.cwd()) Input arguments: - - recordname (required): The name of the WFDB record to be read (without any file extensions). If the argument contains any path delimiter characters, the argument will be interpreted as PATH/baserecord and the data files will be searched for in the local path. If the pbdownload flag is set to 1, recordname will be interpreted as a physiobank record name including the database subdirectory. + - recordname (required): The name of the WFDB record to be read (without any file extensions). + If the argument contains any path delimiter characters, the argument will be interpreted as + PATH/baserecord and the data files will be searched for in the local path. If the pbdownload + flag is set to 1, recordname will be interpreted as a physiobank record name including the + database subdirectory. - sampfrom (default=0): The starting sample number to read for each channel. - sampto (default=length of entire signal): The final sample number to read for each channel. - channels (default=all channels): Indices specifying the channel to be returned. - - physical (default=1): Flag that specifies whether to return signals in physical (1) or digital (0) units. - - stacksegments (default=1): Flag used only for multi-segment files. Specifies whether to return the signal as a single stacked/concatenated numpy array (1) or as a list of one numpy array for each segment (0). - - pbdl (default=0): If this argument is set, the function will assume that the user is trying to download a physiobank file. Therefore the 'recordname' argument will be interpreted as a physiobank record name including the database subdirectory, rather than a local directory. + - physical (default=1): Flag that specifies whether to return signals in physical (1) or + digital (0) units. + - stacksegments (default=1): Flag used only for multi-segment files. Specifies whether to + return the signal as a single stacked/concatenated numpy array (1) or as a list of one + numpy array for each segment (0). + - pbdl (default=0): If this argument is set, the function will assume that the user is trying + to download a physiobank file. Therefore the 'recordname' argument will be interpreted as + a physiobank record name including the database subdirectory, rather than a local directory. - dldir (default=os.getcwd()): The directory to download physiobank files to. - - keepfiles (default=0): Flag specifying whether to keep physiobank files newly downloaded through the function call. Output variables: - sig: An nxm numpy array where n is the signal length and m is the number of channels. @@ -1020,7 +745,8 @@ def rdsamp( : The last list element will be a list of dictionaries of metadata for each segment. For empty segments, the dictionary will be replaced by a single string: 'Empty Segment' - Example: sig, fields = wfdb.rdsamp('macecgdb/test01_00s', sampfrom=800, pbdl=1, dldir='/home/username/Downloads/wfdb') + Example: sig, fields = wfdb.rdsamp('macecgdb/test01_00s', sampfrom=800, pbdl=1, + dldir='/home/username/Downloads/wfdb') """ if sampfrom < 0: @@ -1028,9 +754,9 @@ def rdsamp( if channels and min(channels) < 0: sys.exit("input channels must be non-negative") - dirname, baserecordname, filestoremove = checkrecordfiles(recordname, pbdl, dldir, keepfiles) + dirname, baserecordname = checkrecordfiles(recordname, pbdl, dldir) - fields = readheader(os.path.join(dirname, baserecordname)) + fields = rdheader(os.path.join(dirname, baserecordname)) if fields["nsig"] == 0: sys.exit("This record has no signals. Use rdann to read annotations") @@ -1039,7 +765,8 @@ def rdsamp( # Single segment file if fields["nseg"] == 1: - sig, fields = processsegment(fields, dirname, baserecordname, sampfrom, sampto, channels, physical) + sig, fields = processsegment(fields, dirname, baserecordname, sampfrom, sampto, + channels, physical) # Multi-segment file. Preprocess and recursively call rdsamp on segments else: @@ -1051,7 +778,8 @@ def rdsamp( readsegs, readsamps, sampto = requiredsections(fields, sampfrom, sampto, startseg) # Preprocess/preallocate according to the chosen output format - sig, channels, nsamp, segmentfields, indstart= allocateoutput(fields, channels, stacksegments, sampfrom, sampto, physical, startseg, readsegs) + sig, channels, nsamp, segmentfields, indstart= allocateoutput(fields, channels, + stacksegments, sampfrom, sampto, physical, startseg, readsegs) # Read and store segments one at a time. # segnum (the segment number) accounts for the layout record if exists @@ -1061,7 +789,8 @@ def rdsamp( segrecordname = fields["filename"][segnum] # Work out the relative channels to return from this segment - segchannels, returninds, emptyinds = getsegmentchannels(startseg, segrecordname, dirname, layoutfields, channels) + segchannels, returninds, emptyinds = getsegmentchannels(startseg, segrecordname, + dirname, layoutfields, channels) if stacksegments == 0: # Return list of np arrays # Empty segment or no desired channels in segment. Store indicator and segment @@ -1123,9 +852,6 @@ def rdsamp( else: # Fixed layout format. fields = [fields, segmentfields] - for fr in filestoremove: - os.remove(fr) - return (sig, fields)