Produces WAV file from WFDB format

Lucas-Mc · Lucas-Mc · commit 545d31340e52 · 2020-07-24T01:06:10.000-04:00
Introduces the conversion of WFDB files to WAV format. All data was written to the WAV file using struct.pack() and numpy.tofile() so no outside packages were introduced. This implementation conserves the -h option which can now be used with help(wfdb.mit2wav). Further, the -o option, which is used to specify their desired WAV file name, is also conserved in the form of the parameter output_filename. Finally, the -n option is conserved which is used to specify whether or not to write an accompanying header file. This is now in the parameter write_header.
diff --git a/wfdb/__init__.py b/wfdb/__init__.py
@@ -1,5 +1,6 @@
 from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp,
-                            wrsamp, dl_database, edf2mit, mit2edf, wav2mit, wfdb2mat, sampfreq, signame)
+                            wrsamp, dl_database, edf2mit, mit2edf, wav2mit, mit2wav,
+                            wfdb2mat, sampfreq, signame)
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
                                 show_ann_classes, ann2rr)
 from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
diff --git a/wfdb/io/__init__.py b/wfdb/io/__init__.py
@@ -1,5 +1,6 @@
 from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp, wrsamp,
-                            dl_database, edf2mit, mit2edf, wav2mit, wfdb2mat, sampfreq, signame, SIGNAL_CLASSES)
+                            dl_database, edf2mit, mit2edf, wav2mit, mit2wav, wfdb2mat,
+                            sampfreq, signame, SIGNAL_CLASSES)
 from wfdb.io._signal import est_res, wr_dat_file
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
                                 show_ann_classes, ann2rr)
diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -1891,6 +1891,160 @@ def mit2edf(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None,
             print('WARNING: output contains an invalid character, {}, at byte {}'.format(val, i))
 
 
+def mit2wav(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None,
+            output_filename='', write_header=False):
+    """
+    This program converts a WFDB record into .wav format (format 16, multiplexed
+    signals, with embedded header information).  Use 'wav2mit' to perform the
+    reverse conversion.
+
+    Parameters
+    ----------
+    record_name : str
+        The name of the input WFDB record to be read. Can also work with both
+        EDF and WAV files.
+    pn_dir : str, optional
+        Option used to stream data from Physionet. The Physionet
+        database directory from which to find the required record files.
+        eg. For record '100' in 'http://physionet.org/content/mitdb'
+        pn_dir='mitdb'.
+    sampfrom : int, optional
+        The starting sample number to read for all channels.
+    sampto : int, 'end', optional
+        The sample number at which to stop reading for all channels.
+        Reads the entire duration by default.
+    channels : list, optional
+        List of integer indices specifying the channels to be read.
+        Reads all channels by default.
+    output_filename : str, optional
+        The desired name of the output file. If this value set to the
+        default value of '', then the output filename will be 'REC.wav'.
+    write_header : bool, optional
+        Whether to write (True) or not to write (False) a header file to
+        accompany the generated WAV file. The default value is 'False'.
+
+    Returns
+    -------
+    N/A
+
+    Notes
+    -----
+    Files that can be processed successfully using `wav2mit` always have exactly
+    three chunks (a header chunk, a format chunk, and a data chunk).  In .wav
+    files, binary data are always written in little-endian format (least
+    significant byte first). The format of `wav2mit`'s input files is as follows:
+
+    [Header chunk]
+    Bytes  0 -  3: "RIFF" [4 ASCII characters]
+    Bytes  4 -  7: L-8 (number of bytes to follow in the file, excluding bytes 0-7)
+    Bytes  8 - 11: "WAVE" [4 ASCII characters]
+
+    [Format chunk]
+    Bytes 12 - 15: "fmt " [4 ASCII characters, note trailing space]
+    Bytes 16 - 19: 16 (format chunk length in bytes, excluding bytes 12-19)
+    Bytes 20 - 35: format specification, consisting of:
+    Bytes 20 - 21: 1 (format tag, indicating no compression is used)
+    Bytes 22 - 23: number of signals (1 - 65535)
+    Bytes 24 - 27: sampling frequency in Hz (per signal)
+                   Note that the sampling frequency in a .wav file must be an
+                   integer multiple of 1 Hz, a restriction that is not imposed
+                   by MIT (WFDB) format.
+    Bytes 28 - 31: bytes per second (sampling frequency * frame size in bytes)
+    Bytes 32 - 33: frame size in bytes
+    Bytes 34 - 35: bits per sample (ADC resolution in bits)
+                   Note that the actual ADC resolution (e.g., 12) is written in
+                   this field, although each output sample is right-padded to fill
+                   a full (16-bit) word. (.wav format allows for 8, 16, 24, and
+                   32 bits per sample)
+
+    [Data chunk]
+    Bytes 36 - 39: "data" [4 ASCII characters]
+    Bytes 40 - 43: L-44 (number of bytes to follow in the data chunk)
+    Bytes 44 - L-1: sample data, consisting of:
+    Bytes 44 - 45: sample 0, channel 0
+    Bytes 46 - 47: sample 0, channel 1
+    ... etc. (same order as in a multiplexed WFDB signal file)
+
+    Examples
+    --------
+    >>> wfdb.mit2wav('100', pn_dir='pwave')
+
+    The output file name is '100.wav'
+
+    """
+    record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom,
+                      sampto=sampto, smooth_frames=False)
+    record_name_out = record_name.split(os.sep)[-1].replace('-','_')
+
+    # Get information needed for the header and format chunks
+    num_samps = record.sig_len
+    samps_per_second = record.fs
+    frame_length = record.n_sig * 2
+    chunk_bytes = num_samps * frame_length
+    file_bytes = chunk_bytes + 36
+    bits_per_sample = max(record.adc_res)
+    offset = record.adc_zero
+    shift = [(16 - v) for v in record.adc_res]
+
+    # Start writing the file
+    if output_filename != '':
+        if not output_filename.endswith('.wav'):
+            raise Exception("Name of output file must end in '.wav'")
+    else:
+        output_filename = record_name_out + '.wav'
+
+    with open(output_filename, 'wb') as f:
+        # Write the WAV file identifier
+        f.write(struct.pack('>4s', b'RIFF'))
+        # Write the number of bytes to follow in the file
+        # (num_samps*frame_length) sample bytes, and 36 more bytes of miscellaneous embedded header
+        f.write(struct.pack('<I', file_bytes))
+        # Descriptor for the format of the file
+        f.write(struct.pack('>8s', b'WAVEfmt '))
+        # Number of bytes to follow in the format chunk
+        f.write(struct.pack('<I', 16))
+        # The format tag
+        f.write(struct.pack('<H', 1))
+        # The number of signals
+        f.write(struct.pack('<H', record.n_sig))
+        # The samples per second
+        f.write(struct.pack('<I', samps_per_second))
+        # The number of bytes per second
+        f.write(struct.pack('<I', samps_per_second * frame_length))
+        # The length of each frame
+        f.write(struct.pack('<H', frame_length))
+        # The number of bits per samples
+        f.write(struct.pack('<H', bits_per_sample))
+        # The descriptor to indicate that the data information is next
+        f.write(struct.pack('>4s', b'data'))
+        # The number of bytes in the signal data chunk
+        f.write(struct.pack('<I', chunk_bytes))
+        # Write the signal data... the closest I can get to the original implementation
+        # Mismatched elements: 723881 / 15400000 (4.7%)
+        # Max absolute difference: 2
+        # Max relative difference: 0.00444444
+        #  x: array([ -322,  3852, -9246, ...,     0,     0,     0], dtype=int16)
+        #  y: array([ -322,  3852, -9246, ...,     0,     0,     0], dtype=int16)
+        sig_data = np.left_shift(np.subtract(record.adc(), offset), shift).reshape((1, -1)).astype(np.int16)
+        sig_data.tofile(f)
+
+    # If asked to write the accompanying header file
+    if write_header:
+        record.adc_zero = record.n_sig * [0]
+        record.adc_res = record.n_sig * [16]
+        record.adc_gain = [(r * (1 << shift[i])) for i,r in enumerate(record.adc_gain)]
+        record.baseline = [(b - offset[i]) for i,b in enumerate(record.baseline)]
+        record.baseline = [(b * (1 << shift[i])) for i,b in enumerate(record.baseline)]
+        record.file_name = record.n_sig * [record_name_out + '.wav']
+        record.block_size = record.n_sig * [0]
+        record.fmt = record.n_sig * ['16']
+        record.samps_per_fram = record.n_sig * [1]
+        record.init_value = sig_data[0][:record.n_sig].tolist()
+        record.byte_offset = record.n_sig * [44]
+        # Write the header file
+        record.wrheader()
+
+
 def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False):
     """
     Convert .wav (format 16, multiplexed signals, with embedded header