diff --git a/tests/test_record.py b/tests/test_record.py index ccdad08c..b592ef7c 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -26,19 +26,22 @@ def test_1a(self): Target file created with: rdsamp -r sample-data/test01_00s | cut -f 2- > record-1a """ - record = wfdb.rdrecord('sample-data/test01_00s', physical=False) + record = wfdb.rdrecord('sample-data/test01_00s', + physical=False, return_res=16) sig = record.d_signal sig_target = np.genfromtxt('tests/target-output/record-1a') # Compare data streaming from Physionet - record_pn = wfdb.rdrecord('test01_00s', physical=False, - pn_dir='macecgdb') + record_pn = wfdb.rdrecord('test01_00s', pn_dir='macecgdb', + physical=False, return_res=16) # Test file writing - record_2 = wfdb.rdrecord('sample-data/test01_00s', physical=False) + record_2 = wfdb.rdrecord('sample-data/test01_00s', + physical=False, return_res=16) record_2.sig_name = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4'] record_2.wrsamp() - record_write = wfdb.rdrecord('test01_00s', physical=False) + record_write = wfdb.rdrecord('test01_00s', + physical=False, return_res=16) assert np.array_equal(sig, sig_target) assert record.__eq__(record_pn) @@ -75,24 +78,30 @@ def test_1b(self): def test_1c(self): """ Format 16, byte offset, selected duration, selected channels, - digital. + digital, expanded format. Target file created with: rdsamp -r sample-data/a103l -f 80 -s 0 1 | cut -f 2- > record-1c """ record = wfdb.rdrecord('sample-data/a103l', - sampfrom=20000, channels=[0, 1], physical=False) - sig = record.d_signal + sampfrom=20000, channels=[0, 1], physical=False, + smooth_frames=False) + # convert expanded to uniform array + sig = np.zeros((record.sig_len, record.n_sig)) + for i in range(record.n_sig): + sig[:,i] = record.e_d_signal[i] + sig_target = np.genfromtxt('tests/target-output/record-1c') # Compare data streaming from Physionet record_pn = wfdb.rdrecord('a103l', pn_dir='challenge-2015/training', sampfrom=20000, channels=[0, 1], - physical=False) + physical=False, smooth_frames=False) # Test file writing - record.wrsamp() - record_write = wfdb.rdrecord('a103l', physical=False) + record.wrsamp(expanded=True) + record_write = wfdb.rdrecord('a103l', physical=False, + smooth_frames=False) assert np.array_equal(sig, sig_target) assert record.__eq__(record_pn) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 868d3596..ededa1b9 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -750,7 +750,7 @@ def calc_checksum(self, expanded=False): """ if expanded: - cs = [int(np.sum(self.e_d_signal[ch]) % 65536) for ch in range(self.n_sig)] + cs = [int(np.sum(s) % 65536) for s in self.e_d_signal] else: cs = np.sum(self.d_signal, 0) % 65536 cs = [int(c) for c in cs] @@ -827,37 +827,61 @@ def smooth_frames(self, sigtype='physical'): if spf[ch] is None: spf[ch] = 1 - # Total samples per frame - tspf = sum(spf) + # The output data type should be the smallest type that can + # represent any input sample value. The intermediate data type + # must be able to represent the sum of spf[ch] sample values. if sigtype == 'physical': - n_sig = len(self.e_p_signal) - sig_len = int(len(self.e_p_signal[0])/spf[0]) - signal = np.zeros((sig_len, n_sig), dtype='float64') - - for ch in range(n_sig): - if spf[ch] == 1: - signal[:, ch] = self.e_p_signal[ch] - else: - for frame in range(spf[ch]): - signal[:, ch] += self.e_p_signal[ch][frame::spf[ch]] - signal[:, ch] = signal[:, ch] / spf[ch] - + expanded_signal = self.e_p_signal + intermediate_dtype = np.dtype('float64') + allowed_dtypes = [ + np.dtype('float32'), + np.dtype('float64'), + ] elif sigtype == 'digital': - n_sig = len(self.e_d_signal) - sig_len = int(len(self.e_d_signal[0])/spf[0]) - signal = np.zeros((sig_len, n_sig), dtype='int64') - - for ch in range(n_sig): - if spf[ch] == 1: - signal[:, ch] = self.e_d_signal[ch] - else: - for frame in range(spf[ch]): - signal[:, ch] += self.e_d_signal[ch][frame::spf[ch]] - signal[:, ch] = signal[:, ch] / spf[ch] + expanded_signal = self.e_d_signal + intermediate_dtype = np.dtype('int64') + allowed_dtypes = [ + np.dtype('int8'), + np.dtype('int16'), + np.dtype('int32'), + np.dtype('int64'), + ] else: raise ValueError("sigtype must be 'physical' or 'digital'") + n_sig = len(expanded_signal) + sig_len = len(expanded_signal[0]) // spf[0] + input_dtypes = set() + for ch in range(n_sig): + if len(expanded_signal[ch]) != sig_len * spf[ch]: + raise ValueError("length mismatch: signal %d has %d samples," + " expected %dx%d" + % (ch, len(expanded_signal), + sig_len, spf[ch])) + input_dtypes.add(expanded_signal[ch].dtype) + + for output_dtype in allowed_dtypes: + if all(dt <= output_dtype for dt in input_dtypes): + break + + signal = np.empty((sig_len, n_sig), dtype=output_dtype) + + # Large input arrays will be processed in chunks to avoid the need + # to allocate a single huge temporary array. + CHUNK_SIZE = 65536 + + for ch in range(n_sig): + if spf[ch] == 1: + signal[:, ch] = expanded_signal[ch] + else: + frames = expanded_signal[ch].reshape(-1, spf[ch]) + for chunk_start in range(0, sig_len, CHUNK_SIZE): + chunk_end = chunk_start + CHUNK_SIZE + signal_sum = np.sum(frames[chunk_start:chunk_end, :], + axis=1, dtype=intermediate_dtype) + signal[chunk_start:chunk_end, ch] = signal_sum / spf[ch] + return signal @@ -866,7 +890,7 @@ def smooth_frames(self, sigtype='physical'): def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, samps_per_frame, skew, init_value, sampfrom, sampto, channels, - smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64): + ignore_skew, no_file=False, sig_data=None, return_res=64): """ Read the digital samples from a single segment record's associated dat file(s). @@ -899,8 +923,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, The starting sample number to be read from the signals. sampto : int The final sample number to be read from the signals. - smooth_frames : bool - Whether to smooth channels with multiple samples/frame. ignore_skew : bool Used when reading records with at least one skewed signal. Specifies whether to apply the skew to align the signals in the @@ -920,17 +942,14 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, Returns ------- - signals : ndarray, list - The signals read from the dat file(s). A 2d numpy array is - returned if the signals have uniform samples/frame or if - `smooth_frames` is True. Otherwise a list of 1d numpy arrays - is returned. + signals : list + The signals read from the dat file(s). Each signal is returned as a + one-dimensional numpy array. Notes ----- - 'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew' - are user desired input fields. All other parameters are - specifications of the segment. + 'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired + input fields. All other parameters are specifications of the segment. """ # Check for valid inputs @@ -994,69 +1013,38 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]] out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]] - # Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame. - # Return uniform numpy array - if smooth_frames or sum(samps_per_frame) == n_sig: - # Figure out the largest required dtype for the segment to minimize memory usage - max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True) - # Allocate signal array. Minimize dtype - signals = np.zeros([sampto-sampfrom, len(channels)], dtype=max_dtype) - - # Read each wanted dat file and store signals - for fn in w_file_name: - datsignals = _rd_dat_signals( - file_name=fn, - dir_name=dir_name, - pn_dir=pn_dir, - fmt=w_fmt[fn], - n_sig=len(datchannel[fn]), - sig_len=sig_len, - byte_offset=w_byte_offset[fn], - samps_per_frame=w_samps_per_frame[fn], - skew=w_skew[fn], - init_value=w_init_value[fn], - sampfrom=sampfrom, - sampto=sampto, - smooth_frames=smooth_frames, - no_file=no_file, - sig_data=sig_data) - signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]] - # Return each sample in signals with multiple samples/frame, without smoothing. # Return a list of numpy arrays for each signal. - else: - signals = [None] * len(channels) - - for fn in w_file_name: - # Get the list of all signals contained in the dat file - datsignals = _rd_dat_signals( - file_name=fn, - dir_name=dir_name, - pn_dir=pn_dir, - fmt=w_fmt[fn], - n_sig=len(datchannel[fn]), - sig_len=sig_len, - byte_offset=w_byte_offset[fn], - samps_per_frame=w_samps_per_frame[fn], - skew=w_skew[fn], - init_value=w_init_value[fn], - sampfrom=sampfrom, - sampto=sampto, - smooth_frames=smooth_frames, - no_file=no_file, - sig_data=sig_data) - - # Copy over the wanted signals - for cn in range(len(out_dat_channel[fn])): - signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]] + signals = [None] * len(channels) + + for fn in w_file_name: + # Get the list of all signals contained in the dat file + datsignals = _rd_dat_signals( + file_name=fn, + dir_name=dir_name, + pn_dir=pn_dir, + fmt=w_fmt[fn], + n_sig=len(datchannel[fn]), + sig_len=sig_len, + byte_offset=w_byte_offset[fn], + samps_per_frame=w_samps_per_frame[fn], + skew=w_skew[fn], + init_value=w_init_value[fn], + sampfrom=sampfrom, + sampto=sampto, + no_file=no_file, + sig_data=sig_data) + + # Copy over the wanted signals + for cn in range(len(out_dat_channel[fn])): + signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]] return signals def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, samps_per_frame, skew, init_value, - sampfrom, sampto, smooth_frames, - no_file=False, sig_data=None): + sampfrom, sampto, no_file=False, sig_data=None): """ Read all signals from a WFDB dat file. @@ -1088,8 +1076,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, The starting sample number to be read from the signals. sampto : int The final sample number to be read from the signals. - smooth_frames : bool - Whether to smooth channels with multiple samples/frame. no_file : bool, optional Used when using this function with just an array of signal data and no associated file to read the data from. @@ -1100,16 +1086,13 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, Returns ------- signal : ndarray, list - The signals read from the dat file(s). A 2d numpy array is - returned if the signals have uniform samples/frame or if - `smooth_frames` is True. Otherwise a list of 1d numpy arrays - is returned. + The signals read from the dat file(s). Each signal is returned as a + one-dimensional numpy array. Notes ----- - 'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew' - are user desired input fields. All other parameters are - specifications of the segment. + 'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired + input fields. All other parameters are specifications of the segment. """ # Check for valid inputs @@ -1211,46 +1194,18 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, # At this point, dtype of sig_data is the minimum integer format # required for storing the final digital samples. - # No extra samples/frame. Obtain original uniform numpy array - if tsamps_per_frame == n_sig: - # Reshape into multiple channels - signal = sig_data.reshape(-1, n_sig) - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace) - # Extra frames present to be smoothed. Obtain averaged uniform numpy array - elif smooth_frames: - # Allocate memory for smoothed signal. - signal = np.zeros((int(len(sig_data) / tsamps_per_frame) , n_sig), - dtype=sig_data.dtype) - - # Transfer and average samples - for ch in range(n_sig): - if samps_per_frame[ch] == 1: - signal[:, ch] = sig_data[sum(([0] + samps_per_frame)[:ch + 1])::tsamps_per_frame] - else: - if ch == 0: - startind = 0 - else: - startind = np.sum(samps_per_frame[:ch]) - signal[:,ch] = [np.average(sig_data[ind:ind+samps_per_frame[ch]]) for ind in range(startind,len(sig_data),tsamps_per_frame)] - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace) - - # Extra frames present without wanting smoothing. Return all - # expanded samples. - else: - # List of 1d numpy arrays - signal = [] - # Transfer over samples - sig_frames = sig_data.reshape(-1, tsamps_per_frame) - ch_start = 0 - for ch in range(n_sig): - ch_end = ch_start + samps_per_frame[ch] - ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1) - signal.append(ch_signal) - ch_start = ch_end - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame) + # List of 1d numpy arrays + signal = [] + # Transfer over samples + sig_frames = sig_data.reshape(-1, tsamps_per_frame) + ch_start = 0 + for ch in range(n_sig): + ch_end = ch_start + samps_per_frame[ch] + ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1) + signal.append(ch_signal) + ch_start = ch_end + # Skew the signal + signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame) # Integrity check of signal shape after reading _check_sig_dims(signal, read_len, n_sig, samps_per_frame) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index bc51cda8..9d0ff047 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -625,9 +625,13 @@ def __eq__(self, other, verbose=False): print('Mismatch in attribute: %s' % k, v1, v2) return False - if type(v1) == np.ndarray: + if isinstance(v1, np.ndarray): # Necessary for nans np.testing.assert_array_equal(v1, v2) + elif (isinstance(v1, list) and len(v1) == len(v2) + and all(isinstance(e, np.ndarray) for e in v1)): + for (e1, e2) in zip(v1, v2): + np.testing.assert_array_equal(e1, e2) else: if v1 != v2: if verbose: @@ -664,7 +668,7 @@ def wrsamp(self, expanded=False, write_dir=''): self.wr_dats(expanded=expanded, write_dir=write_dir) - def _arrange_fields(self, channels, sampfrom=0, expanded=False): + def _arrange_fields(self, channels, sampfrom, smooth_frames): """ Arrange/edit object fields to reflect user channel and/or signal range input. @@ -673,10 +677,11 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): ---------- channels : list List of channel numbers specified. - sampfrom : int, optional + sampfrom : int Starting sample number read. - expanded : bool, optional - Whether the record was read in expanded mode. + smooth_frames : bool + Whether to convert the expanded signal array (e_d_signal) into + a smooth signal array (d_signal). Returns ------- @@ -689,11 +694,11 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): setattr(self, field, [item[c] for c in channels]) # Expanded signals - multiple samples per frame. - if expanded: + if not smooth_frames: # Checksum and init_value to be updated if present # unless the whole signal length was input if self.sig_len != int(len(self.e_d_signal[0]) / self.samps_per_frame[0]): - self.checksum = self.calc_checksum(expanded) + self.checksum = self.calc_checksum(True) self.init_value = [s[0] for s in self.e_d_signal] self.n_sig = len(channels) @@ -701,6 +706,9 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): # MxN numpy array d_signal else: + self.d_signal = self.smooth_frames('digital') + self.e_d_signal = None + # Checksum and init_value to be updated if present # unless the whole signal length was input if self.sig_len != self.d_signal.shape[0]: @@ -3363,10 +3371,9 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, directly return a WFDB MultiRecord object (False), or to convert it into and return a WFDB Record object (True). smooth_frames : bool, optional - Used when reading records with signals having multiple samples - per frame. Specifies whether to smooth the samples in signals - with more than one sample per frame and return an (MxN) uniform - numpy array as the `d_signal` or `p_signal` field (True), or to + Specifies whether to smooth the samples in signals with more + than one sample per frame and return an (MxN) uniform numpy + array as the `d_signal` or `p_signal` field (True), or to return a list of 1d numpy arrays containing every expanded sample as the `e_d_signal` or `e_p_signal` field (False). ignore_skew : bool, optional @@ -3514,7 +3521,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, no_file = False sig_data = None - signals = _signal._rd_segment( + record.e_d_signal = _signal._rd_segment( file_name=record.file_name, dir_name=dir_name, pn_dir=pn_dir, @@ -3528,22 +3535,17 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, sampfrom=sampfrom, sampto=sampto, channels=channels, - smooth_frames=smooth_frames, ignore_skew=ignore_skew, no_file=no_file, sig_data=sig_data, return_res=return_res) # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array - if smooth_frames or max([record.samps_per_frame[c] for c in channels]) == 1: - # Read signals from the associated dat files that contain - # wanted channels - record.d_signal = signals - + if smooth_frames: # Arrange/edit the object fields to reflect user channel # and/or signal range input record._arrange_fields(channels=channels, sampfrom=sampfrom, - expanded=False) + smooth_frames=True) if physical: # Perform inplace dac to get physical signal @@ -3551,12 +3553,10 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, # Return each sample of the signals with multiple samples per frame else: - record.e_d_signal = signals - # Arrange/edit the object fields to reflect user channel # and/or signal range input record._arrange_fields(channels=channels, sampfrom=sampfrom, - expanded=True) + smooth_frames=False) if physical: # Perform dac to get physical signal