Skip to content

Handle smooth_frames=False consistently for single and multi-frequency records #313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,22 @@ def test_1a(self):
Target file created with:
rdsamp -r sample-data/test01_00s | cut -f 2- > record-1a
"""
record = wfdb.rdrecord('sample-data/test01_00s', physical=False)
record = wfdb.rdrecord('sample-data/test01_00s',
physical=False, return_res=16)
sig = record.d_signal
sig_target = np.genfromtxt('tests/target-output/record-1a')

# Compare data streaming from Physionet
record_pn = wfdb.rdrecord('test01_00s', physical=False,
pn_dir='macecgdb')
record_pn = wfdb.rdrecord('test01_00s', pn_dir='macecgdb',
physical=False, return_res=16)

# Test file writing
record_2 = wfdb.rdrecord('sample-data/test01_00s', physical=False)
record_2 = wfdb.rdrecord('sample-data/test01_00s',
physical=False, return_res=16)
record_2.sig_name = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4']
record_2.wrsamp()
record_write = wfdb.rdrecord('test01_00s', physical=False)
record_write = wfdb.rdrecord('test01_00s',
physical=False, return_res=16)

assert np.array_equal(sig, sig_target)
assert record.__eq__(record_pn)
Expand Down Expand Up @@ -75,24 +78,30 @@ def test_1b(self):
def test_1c(self):
"""
Format 16, byte offset, selected duration, selected channels,
digital.
digital, expanded format.

Target file created with:
rdsamp -r sample-data/a103l -f 80 -s 0 1 | cut -f 2- > record-1c
"""
record = wfdb.rdrecord('sample-data/a103l',
sampfrom=20000, channels=[0, 1], physical=False)
sig = record.d_signal
sampfrom=20000, channels=[0, 1], physical=False,
smooth_frames=False)
# convert expanded to uniform array
sig = np.zeros((record.sig_len, record.n_sig))
for i in range(record.n_sig):
sig[:,i] = record.e_d_signal[i]

sig_target = np.genfromtxt('tests/target-output/record-1c')

# Compare data streaming from Physionet
record_pn = wfdb.rdrecord('a103l', pn_dir='challenge-2015/training',
sampfrom=20000, channels=[0, 1],
physical=False)
physical=False, smooth_frames=False)

# Test file writing
record.wrsamp()
record_write = wfdb.rdrecord('a103l', physical=False)
record.wrsamp(expanded=True)
record_write = wfdb.rdrecord('a103l', physical=False,
smooth_frames=False)

assert np.array_equal(sig, sig_target)
assert record.__eq__(record_pn)
Expand Down
237 changes: 96 additions & 141 deletions wfdb/io/_signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ def calc_checksum(self, expanded=False):

"""
if expanded:
cs = [int(np.sum(self.e_d_signal[ch]) % 65536) for ch in range(self.n_sig)]
cs = [int(np.sum(s) % 65536) for s in self.e_d_signal]
else:
cs = np.sum(self.d_signal, 0) % 65536
cs = [int(c) for c in cs]
Expand Down Expand Up @@ -827,37 +827,61 @@ def smooth_frames(self, sigtype='physical'):
if spf[ch] is None:
spf[ch] = 1

# Total samples per frame
tspf = sum(spf)
# The output data type should be the smallest type that can
# represent any input sample value. The intermediate data type
# must be able to represent the sum of spf[ch] sample values.

if sigtype == 'physical':
n_sig = len(self.e_p_signal)
sig_len = int(len(self.e_p_signal[0])/spf[0])
signal = np.zeros((sig_len, n_sig), dtype='float64')

for ch in range(n_sig):
if spf[ch] == 1:
signal[:, ch] = self.e_p_signal[ch]
else:
for frame in range(spf[ch]):
signal[:, ch] += self.e_p_signal[ch][frame::spf[ch]]
signal[:, ch] = signal[:, ch] / spf[ch]

expanded_signal = self.e_p_signal
intermediate_dtype = np.dtype('float64')
allowed_dtypes = [
np.dtype('float32'),
np.dtype('float64'),
]
elif sigtype == 'digital':
n_sig = len(self.e_d_signal)
sig_len = int(len(self.e_d_signal[0])/spf[0])
signal = np.zeros((sig_len, n_sig), dtype='int64')

for ch in range(n_sig):
if spf[ch] == 1:
signal[:, ch] = self.e_d_signal[ch]
else:
for frame in range(spf[ch]):
signal[:, ch] += self.e_d_signal[ch][frame::spf[ch]]
signal[:, ch] = signal[:, ch] / spf[ch]
expanded_signal = self.e_d_signal
intermediate_dtype = np.dtype('int64')
allowed_dtypes = [
np.dtype('int8'),
np.dtype('int16'),
np.dtype('int32'),
np.dtype('int64'),
]
else:
raise ValueError("sigtype must be 'physical' or 'digital'")

n_sig = len(expanded_signal)
sig_len = len(expanded_signal[0]) // spf[0]
input_dtypes = set()
for ch in range(n_sig):
if len(expanded_signal[ch]) != sig_len * spf[ch]:
raise ValueError("length mismatch: signal %d has %d samples,"
" expected %dx%d"
% (ch, len(expanded_signal),
sig_len, spf[ch]))
input_dtypes.add(expanded_signal[ch].dtype)

for output_dtype in allowed_dtypes:
if all(dt <= output_dtype for dt in input_dtypes):
break

signal = np.empty((sig_len, n_sig), dtype=output_dtype)

# Large input arrays will be processed in chunks to avoid the need
# to allocate a single huge temporary array.
CHUNK_SIZE = 65536

for ch in range(n_sig):
if spf[ch] == 1:
signal[:, ch] = expanded_signal[ch]
else:
frames = expanded_signal[ch].reshape(-1, spf[ch])
for chunk_start in range(0, sig_len, CHUNK_SIZE):
chunk_end = chunk_start + CHUNK_SIZE
signal_sum = np.sum(frames[chunk_start:chunk_end, :],
axis=1, dtype=intermediate_dtype)
signal[chunk_start:chunk_end, ch] = signal_sum / spf[ch]

return signal


Expand All @@ -866,7 +890,7 @@ def smooth_frames(self, sigtype='physical'):

def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
samps_per_frame, skew, init_value, sampfrom, sampto, channels,
smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64):
ignore_skew, no_file=False, sig_data=None, return_res=64):
"""
Read the digital samples from a single segment record's associated
dat file(s).
Expand Down Expand Up @@ -899,8 +923,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
The starting sample number to be read from the signals.
sampto : int
The final sample number to be read from the signals.
smooth_frames : bool
Whether to smooth channels with multiple samples/frame.
ignore_skew : bool
Used when reading records with at least one skewed signal.
Specifies whether to apply the skew to align the signals in the
Expand All @@ -920,17 +942,14 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,

Returns
-------
signals : ndarray, list
The signals read from the dat file(s). A 2d numpy array is
returned if the signals have uniform samples/frame or if
`smooth_frames` is True. Otherwise a list of 1d numpy arrays
is returned.
signals : list
The signals read from the dat file(s). Each signal is returned as a
one-dimensional numpy array.

Notes
-----
'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
are user desired input fields. All other parameters are
specifications of the segment.
'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
input fields. All other parameters are specifications of the segment.

"""
# Check for valid inputs
Expand Down Expand Up @@ -994,69 +1013,38 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]]
out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]]

# Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame.
# Return uniform numpy array
if smooth_frames or sum(samps_per_frame) == n_sig:
# Figure out the largest required dtype for the segment to minimize memory usage
max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True)
# Allocate signal array. Minimize dtype
signals = np.zeros([sampto-sampfrom, len(channels)], dtype=max_dtype)

# Read each wanted dat file and store signals
for fn in w_file_name:
datsignals = _rd_dat_signals(
file_name=fn,
dir_name=dir_name,
pn_dir=pn_dir,
fmt=w_fmt[fn],
n_sig=len(datchannel[fn]),
sig_len=sig_len,
byte_offset=w_byte_offset[fn],
samps_per_frame=w_samps_per_frame[fn],
skew=w_skew[fn],
init_value=w_init_value[fn],
sampfrom=sampfrom,
sampto=sampto,
smooth_frames=smooth_frames,
no_file=no_file,
sig_data=sig_data)
signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]]

# Return each sample in signals with multiple samples/frame, without smoothing.
# Return a list of numpy arrays for each signal.
else:
signals = [None] * len(channels)

for fn in w_file_name:
# Get the list of all signals contained in the dat file
datsignals = _rd_dat_signals(
file_name=fn,
dir_name=dir_name,
pn_dir=pn_dir,
fmt=w_fmt[fn],
n_sig=len(datchannel[fn]),
sig_len=sig_len,
byte_offset=w_byte_offset[fn],
samps_per_frame=w_samps_per_frame[fn],
skew=w_skew[fn],
init_value=w_init_value[fn],
sampfrom=sampfrom,
sampto=sampto,
smooth_frames=smooth_frames,
no_file=no_file,
sig_data=sig_data)

# Copy over the wanted signals
for cn in range(len(out_dat_channel[fn])):
signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]
signals = [None] * len(channels)

for fn in w_file_name:
# Get the list of all signals contained in the dat file
datsignals = _rd_dat_signals(
file_name=fn,
dir_name=dir_name,
pn_dir=pn_dir,
fmt=w_fmt[fn],
n_sig=len(datchannel[fn]),
sig_len=sig_len,
byte_offset=w_byte_offset[fn],
samps_per_frame=w_samps_per_frame[fn],
skew=w_skew[fn],
init_value=w_init_value[fn],
sampfrom=sampfrom,
sampto=sampto,
no_file=no_file,
sig_data=sig_data)

# Copy over the wanted signals
for cn in range(len(out_dat_channel[fn])):
signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]

return signals


def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
byte_offset, samps_per_frame, skew, init_value,
sampfrom, sampto, smooth_frames,
no_file=False, sig_data=None):
sampfrom, sampto, no_file=False, sig_data=None):
"""
Read all signals from a WFDB dat file.

Expand Down Expand Up @@ -1088,8 +1076,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
The starting sample number to be read from the signals.
sampto : int
The final sample number to be read from the signals.
smooth_frames : bool
Whether to smooth channels with multiple samples/frame.
no_file : bool, optional
Used when using this function with just an array of signal data
and no associated file to read the data from.
Expand All @@ -1100,16 +1086,13 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
Returns
-------
signal : ndarray, list
The signals read from the dat file(s). A 2d numpy array is
returned if the signals have uniform samples/frame or if
`smooth_frames` is True. Otherwise a list of 1d numpy arrays
is returned.
The signals read from the dat file(s). Each signal is returned as a
one-dimensional numpy array.

Notes
-----
'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
are user desired input fields. All other parameters are
specifications of the segment.
'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
input fields. All other parameters are specifications of the segment.

"""
# Check for valid inputs
Expand Down Expand Up @@ -1211,46 +1194,18 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
# At this point, dtype of sig_data is the minimum integer format
# required for storing the final digital samples.

# No extra samples/frame. Obtain original uniform numpy array
if tsamps_per_frame == n_sig:
# Reshape into multiple channels
signal = sig_data.reshape(-1, n_sig)
# Skew the signal
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)
# Extra frames present to be smoothed. Obtain averaged uniform numpy array
elif smooth_frames:
# Allocate memory for smoothed signal.
signal = np.zeros((int(len(sig_data) / tsamps_per_frame) , n_sig),
dtype=sig_data.dtype)

# Transfer and average samples
for ch in range(n_sig):
if samps_per_frame[ch] == 1:
signal[:, ch] = sig_data[sum(([0] + samps_per_frame)[:ch + 1])::tsamps_per_frame]
else:
if ch == 0:
startind = 0
else:
startind = np.sum(samps_per_frame[:ch])
signal[:,ch] = [np.average(sig_data[ind:ind+samps_per_frame[ch]]) for ind in range(startind,len(sig_data),tsamps_per_frame)]
# Skew the signal
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)

# Extra frames present without wanting smoothing. Return all
# expanded samples.
else:
# List of 1d numpy arrays
signal = []
# Transfer over samples
sig_frames = sig_data.reshape(-1, tsamps_per_frame)
ch_start = 0
for ch in range(n_sig):
ch_end = ch_start + samps_per_frame[ch]
ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
signal.append(ch_signal)
ch_start = ch_end
# Skew the signal
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)
# List of 1d numpy arrays
signal = []
# Transfer over samples
sig_frames = sig_data.reshape(-1, tsamps_per_frame)
ch_start = 0
for ch in range(n_sig):
ch_end = ch_start + samps_per_frame[ch]
ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
signal.append(ch_signal)
ch_start = ch_end
# Skew the signal
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)

# Integrity check of signal shape after reading
_check_sig_dims(signal, read_len, n_sig, samps_per_frame)
Expand Down
Loading