Skip to content

Commit 9b902c6

Browse files
committed
Produces MAT file from WFDB format
1 parent 89bc840 commit 9b902c6

File tree

3 files changed

+274
-2
lines changed

3 files changed

+274
-2
lines changed

wfdb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp,
2-
wrsamp, dl_database, edf2mit, wav2mit, sampfreq, signame)
2+
wrsamp, dl_database, edf2mit, wav2mit, wfdb2mat, sampfreq, signame)
33
from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
44
show_ann_classes, ann2rr)
55
from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url

wfdb/io/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from wfdb.io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp, wrsamp,
2-
dl_database, edf2mit, wav2mit, sampfreq, signame, SIGNAL_CLASSES)
2+
dl_database, edf2mit, wav2mit, wfdb2mat, sampfreq, signame, SIGNAL_CLASSES)
33
from wfdb.io._signal import est_res, wr_dat_file
44
from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
55
show_ann_classes, ann2rr)

wfdb/io/record.py

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1706,6 +1706,278 @@ def wav2mit(record_name, pn_dir=None, delete_file=True, record_only=False):
17061706
pass
17071707

17081708

1709+
def wfdb2mat(record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None):
1710+
"""
1711+
This program converts the signals of any PhysioNet record (or one in any
1712+
compatible format) into a .mat file that can be read directly using any version
1713+
of Matlab, and a short text file containing information about the signals
1714+
(names, gains, baselines, units, sampling frequency, and start time/date if
1715+
known). If the input record name is REC, the output files are RECm.mat and
1716+
RECm.hea. The output files can also be read by any WFDB application as record
1717+
RECm.
1718+
1719+
This program does not convert annotation files; for that task, 'rdann' is
1720+
recommended.
1721+
1722+
The output .mat file contains a single matrix named `val` containing raw
1723+
(unshifted, unscaled) samples from the selected record. Using various options,
1724+
you can select any time interval within a record, or any subset of the signals,
1725+
which can be rearranged as desired within the rows of the matrix. Since .mat
1726+
files are written in column-major order (i.e., all of column n precedes all of
1727+
column n+1), each vector of samples is written as a column rather than as a
1728+
row, so that the column number in the .mat file equals the sample number in the
1729+
input record (minus however many samples were skipped at the beginning of the
1730+
record, as specified using the `start_time` option). If this seems odd, transpose
1731+
your matrix after reading it!
1732+
1733+
This program writes version 5 MAT-file format output files, as documented in
1734+
http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
1735+
The samples are written as 32-bit signed integers (mattype=20 below) in
1736+
little-endian format if the record contains any format 24 or format 32 signals,
1737+
as 8-bit unsigned integers (mattype=50) if the record contains only format 80
1738+
signals, or as 16-bit signed integers in little-endian format (mattype=30)
1739+
otherwise.
1740+
1741+
The maximum size of the output variable is 2^31 bytes. `wfdb2mat` from versions
1742+
10.5.24 and earlier of the original WFDB software package writes version 4 MAT-
1743+
files which have the additional constraint of 100,000,000 elements per variable.
1744+
1745+
The output files (recordm.mat + recordm.hea) are still WFDB-compatible, given
1746+
the .hea file constructed by this program.
1747+
1748+
Parameters
1749+
----------
1750+
record_name : str
1751+
The name of the input WFDB record to be read. Can also work with both
1752+
EDF and WAV files.
1753+
pn_dir : str, optional
1754+
Option used to stream data from Physionet. The Physionet
1755+
database directory from which to find the required record files.
1756+
eg. For record '100' in 'http://physionet.org/content/mitdb'
1757+
pn_dir='mitdb'.
1758+
sampfrom : int, optional
1759+
The starting sample number to read for all channels.
1760+
sampto : int, 'end', optional
1761+
The sample number at which to stop reading for all channels.
1762+
Reads the entire duration by default.
1763+
channels : list, optional
1764+
List of integer indices specifying the channels to be read.
1765+
Reads all channels by default.
1766+
1767+
Returns
1768+
-------
1769+
N/A
1770+
1771+
Notes
1772+
-----
1773+
The entire file is composed of:
1774+
1775+
Bytes 0 - 127: descriptive text
1776+
Bytes 128 - 131: master tag (data type = matrix)
1777+
Bytes 132 - 135: master tag (data size)
1778+
Bytes 136 - 151: array flags (4 byte tag with data type, 4 byte
1779+
tag with subelement size, 8 bytes of content)
1780+
Bytes 152 - 167: array dimension (4 byte tag with data type, 4
1781+
byte tag with subelement size, 8 bytes of content)
1782+
Bytes 168 - 183: array name (4 byte tag with data type, 4 byte
1783+
tag with subelement size, 8 bytes of content)
1784+
Bytes 184 - ...: array content (4 byte tag with data type, 4 byte
1785+
tag with subelement size, ... bytes of content)
1786+
1787+
Examples
1788+
--------
1789+
>>> wfdb.wfdb2mat('100', pn_dir='pwave')
1790+
1791+
The output file name is 100m.mat and 100m.hea
1792+
1793+
"""
1794+
record = rdrecord(record_name, pn_dir=pn_dir, sampfrom=sampfrom, sampto=sampto)
1795+
record_name_out = record_name.split(os.sep)[-1].replace('-','_') + 'm'
1796+
1797+
# Some variables describing the format of the .mat file
1798+
field_version = 256 # 0x0100 or 256
1799+
endian_indicator = b'IM' # little endian
1800+
master_type = 14 # matrix
1801+
sub1_type = 6 # UINT32
1802+
sub2_type = 5 # INT32
1803+
sub3_type = 1 # INT8
1804+
sub1_class = 6 # double precision array
1805+
1806+
# Determine if we can write 8-bit unsigned samples, or if 16 or 32 bits
1807+
# are needed per sample
1808+
bytes_per_element = 1
1809+
for i in range(record.n_sig):
1810+
if (record.adc_res[i] > 0):
1811+
if (record.adc_res[i] > 16):
1812+
bytes_per_element = 4
1813+
elif (record.adc_res[i] > 8) and (bytes_per_element < 2):
1814+
bytes_per_element = 2
1815+
else:
1816+
# adc_res not specified.. try to guess from format
1817+
if (record.fmt[i] == '24') or (record.fmt[i] == '32'):
1818+
bytes_per_element = 4
1819+
elif (record.fmt[i] != '80') and (bytes_per_element < 2):
1820+
bytes_per_element = 2
1821+
1822+
if (bytes_per_element == 1):
1823+
sub4_type = 2 # MAT8
1824+
out_type = '<u1' # np.uint8
1825+
wfdb_type = '80' # Offset binary form (80)
1826+
offset = 128 # Offset between sample values and the raw
1827+
# byte/word values as interpreted by Matlab/Octave
1828+
elif (bytes_per_element == 2):
1829+
sub4_type = 3 # MAT16
1830+
out_type = '<i2' # np.int16
1831+
wfdb_type = '16' # Align with byte boundary (16)
1832+
offset = 0 # Offset between sample values and the raw
1833+
# byte/word values as interpreted by Matlab/Octave
1834+
else:
1835+
sub4_type = 5 # MAT32
1836+
out_type = '<i4' # np.int32
1837+
wfdb_type = '32' # Align with byte boundary (32)
1838+
offset = 0 # Offset between sample values and the raw
1839+
# byte/word values as interpreted by Matlab/Octave
1840+
1841+
# Ensure the signal size does not exceed the 2^31 byte limit
1842+
max_length = int((2**31) / bytes_per_element / record.n_sig)
1843+
if sampto is None:
1844+
sampto = record.p_signal.shape[0]
1845+
desired_length = sampto - sampfrom
1846+
# Snip record
1847+
if desired_length > max_length:
1848+
raise Exception("Can't write .mat file: data size exceeds 2GB limit")
1849+
1850+
# Bytes of actual data
1851+
bytes_of_data = bytes_per_element * record.n_sig * desired_length
1852+
# This is the remaining number of bytes that don't fit into integer
1853+
# multiple of 8: i.e. if 18 bytes, bytes_remain = 2, from 17 to 18
1854+
bytes_remain = bytes_of_data % 8
1855+
1856+
# master_bytes = (8 + 8) + (8 + 8) + (8 + 8) + (8 + bytes_of_data) + padding
1857+
# Must be integer multiple 8
1858+
if bytes_remain == 0:
1859+
master_bytes = bytes_of_data + 56
1860+
else:
1861+
master_bytes = bytes_of_data + 64 - (bytes_remain)
1862+
1863+
# Start writing the file
1864+
output_file = record_name_out + '.mat'
1865+
with open(output_file, 'wb') as f:
1866+
# Descriptive text (124 bytes)
1867+
f.write(struct.pack('<124s', b'MATLAB 5.0'))
1868+
# Version (2 bytes)
1869+
f.write(struct.pack('<H', field_version))
1870+
# Endian indicator (2 bytes)
1871+
f.write(struct.pack('<2s', endian_indicator))
1872+
1873+
# Master tag data type (4 bytes)
1874+
f.write(struct.pack('<I', master_type))
1875+
# Master tag number of bytes (4 bytes)
1876+
# Number of bytes of data element
1877+
# = (8 + 8) + (8 + 8) + (8 + 8) + (8 + bytes_of_data)
1878+
# = 56 + bytes_of_data
1879+
f.write(struct.pack('<I', master_bytes))
1880+
1881+
# Matrix data has 4 subelements (5 if imaginary):
1882+
# Array flags, dimensions array, array name, real part
1883+
# Each subelement has its own subtag, and subdata
1884+
1885+
# Subelement 1: Array flags
1886+
# Subtag 1: data type (4 bytes)
1887+
f.write(struct.pack('<I', sub1_type))
1888+
# Subtag 1: number of bytes (4 bytes)
1889+
f.write(struct.pack('<I', 8))
1890+
# Value class indication the MATLAB data type (8 bytes)
1891+
f.write(struct.pack('<Q', sub1_class))
1892+
1893+
# Subelement 2: Rows and columns
1894+
# Subtag 2: data type (4 bytes)
1895+
f.write(struct.pack('<I', sub2_type))
1896+
# Subtag 2: number of bytes (4 bytes)
1897+
f.write(struct.pack('<I', 8))
1898+
# Number of signals (4 bytes)
1899+
f.write(struct.pack('<I', record.n_sig))
1900+
# Number of rows (4 bytes)
1901+
f.write(struct.pack('<I', desired_length))
1902+
1903+
# Subelement 3: Array name
1904+
# Subtag 3: data type (4 bytes)
1905+
f.write(struct.pack('<I', sub3_type))
1906+
# Subtag 3: number of bytes (4 bytes)
1907+
f.write(struct.pack('<I', 3))
1908+
# Subtag 3: name of the array (8 bytes)
1909+
f.write(struct.pack('<8s', b'val'))
1910+
1911+
# Subelement 4: Signal data
1912+
# Subtag 4: data type (4 bytes)
1913+
f.write(struct.pack('<I', sub4_type))
1914+
# Subtag 4: number of bytes (4 bytes)
1915+
f.write(struct.pack('<I', bytes_of_data))
1916+
1917+
# Total size of everything before actual data:
1918+
# 128 byte header
1919+
# + 8 byte master tag
1920+
# + 56 byte subelements (48 byte default + 8 byte name)
1921+
# = 192
1922+
1923+
# Copy the selected data into the .mat file
1924+
out_data = record.p_signal * record.adc_gain + record.baseline - record.adc_zero
1925+
# Cast the data to the correct type base on the bytes_per_element
1926+
out_data = np.around(out_data).astype(out_type)
1927+
# out_data should be [r1c1, r1c2, r2c1, r2c2, etc.]
1928+
out_data = out_data.flatten()
1929+
out_fmt = '<%sh' % len(out_data)
1930+
f.write(struct.pack(out_fmt, *out_data))
1931+
1932+
# Display some useful information
1933+
if record.base_time is None:
1934+
if record.base_date is None:
1935+
datetime_string = '[None]'
1936+
else:
1937+
datetime_string = '[{}]'.format(record.base_date.strftime('%d/%m/%Y'))
1938+
else:
1939+
if record.base_date is None:
1940+
datetime_string = '[{}]'.format(record.base_time.strftime('%H:%M:%S.%f'))
1941+
else:
1942+
datetime_string = '[{} {}]'.format(record.base_time.strftime('%H:%M:%S.%f'),
1943+
record.base_date.strftime('%d/%m/%Y'))
1944+
1945+
print('Source: record {}\t\tStart: {}'.format(record_name, datetime_string))
1946+
print('val has {} rows (signals) and {} columns (samples/signal)'.format(record.n_sig,
1947+
desired_length))
1948+
duration_string = str(datetime.timedelta(seconds=desired_length/record.fs))
1949+
print('Duration: {}'.format(duration_string))
1950+
print('Sampling frequency: {} Hz\tSampling interval: {} sec'.format(record.fs,
1951+
1/record.fs))
1952+
print('{:<7}{:<20}{:<17}{:<10}{:<10}'.format('Row','Signal','Gain','Base','Units'))
1953+
record.sig_name = [s.replace(' ','_') for s in record.sig_name]
1954+
for i in range(record.n_sig):
1955+
print('{:<7}{:<20}{:<17}{:<10}{:<10}'.format(i,
1956+
record.sig_name[i],
1957+
record.adc_gain[i],
1958+
record.baseline[i]-record.adc_zero[i]+offset,
1959+
record.units[i]))
1960+
1961+
# Modify the record file to reflect the new data
1962+
num_channels = record.n_sig if (channels is None) else len(channels)
1963+
record.record_name = record_name_out
1964+
record.n_sig = num_channels
1965+
record.samps_per_frame = num_channels * [1]
1966+
record.file_name = num_channels * [output_file]
1967+
record.fmt = num_channels * [wfdb_type]
1968+
record.byte_offset = num_channels * [192]
1969+
record.baseline = [b - record.adc_zero[i] for i,b in enumerate(record.baseline)]
1970+
record.adc_zero = num_channels * [0]
1971+
record.init_value = out_data[:record.n_sig].tolist()
1972+
1973+
# Write the header file RECm.hea
1974+
record.wrheader()
1975+
# Append the following lines to create a signature
1976+
with open(record_name_out+'.hea','a') as f:
1977+
f.write('#Creator: wfdb2mat\n')
1978+
f.write('#Source: record {}\n'.format(record_name))
1979+
1980+
17091981
#------------------------- Reading Records --------------------------- #
17101982

17111983

0 commit comments

Comments
 (0)