From 8f4fd87ef2eca3522f63059998f4f36ec9e8090a Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 13:58:24 -0400 Subject: [PATCH 01/15] _rd_dat_signals: always honor the smooth_frames parameter. This function can return signal data in either of two formats: - "smooth" (a two-dimensional array, where x[t,s] is sample t of signal s) - "non-smooth" (a list of one-dimensional arrays, where x[s][t] is sample t of signal s) Previously, _rd_dat_signals would use "smooth" format if 'sum(samps_per_frame) == n_sig', regardless of 'smooth_frames'. This makes little sense, since the caller needs to know what type of return value to expect. Instead, the format should be determined solely by the 'smooth_frames' parameter. (In this case, the only caller of _rd_dat_signals is _rd_segment, and the only caller of _rd_segment is wfdb.io.record.rdrecord.) --- wfdb/io/_signal.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 868d3596..f7a68b3a 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1089,7 +1089,7 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, sampto : int The final sample number to be read from the signals. smooth_frames : bool - Whether to smooth channels with multiple samples/frame. + Whether to return the result as a two-dimensional array. no_file : bool, optional Used when using this function with just an array of signal data and no associated file to read the data from. @@ -1101,9 +1101,8 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, ------- signal : ndarray, list The signals read from the dat file(s). A 2d numpy array is - returned if the signals have uniform samples/frame or if - `smooth_frames` is True. Otherwise a list of 1d numpy arrays - is returned. + returned if `smooth_frames` is True. Otherwise a list of 1d + numpy arrays is returned. Notes ----- @@ -1212,7 +1211,7 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, # required for storing the final digital samples. # No extra samples/frame. Obtain original uniform numpy array - if tsamps_per_frame == n_sig: + if smooth_frames and tsamps_per_frame == n_sig: # Reshape into multiple channels signal = sig_data.reshape(-1, n_sig) # Skew the signal From 3e84119d37c66deceaa2359c1a294717ac99075b Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 14:16:29 -0400 Subject: [PATCH 02/15] _rd_segment: always honor the smooth_frames parameter. This function can return signal data in either of two formats: - "smooth" (a two-dimensional array, where x[t,s] is sample t of signal s) - "non-smooth" (a list of one-dimensional arrays, where x[s][t] is sample t of signal s) Previously, _rd_segment would use "smooth" format if 'sum(samps_per_frame) == n_sig', regardless of 'smooth_frames'. This makes little sense, since the caller needs to know what type of return value to expect. Instead, the format should be determined solely by the 'smooth_frames' parameter. (In this case, the only caller of _rd_segment is wfdb.io.record.rdrecord.) --- wfdb/io/_signal.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index f7a68b3a..c660269b 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -900,7 +900,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, sampto : int The final sample number to be read from the signals. smooth_frames : bool - Whether to smooth channels with multiple samples/frame. + Whether to return the result as a two-dimensional array. ignore_skew : bool Used when reading records with at least one skewed signal. Specifies whether to apply the skew to align the signals in the @@ -922,9 +922,8 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, ------- signals : ndarray, list The signals read from the dat file(s). A 2d numpy array is - returned if the signals have uniform samples/frame or if - `smooth_frames` is True. Otherwise a list of 1d numpy arrays - is returned. + returned if `smooth_frames` is True. Otherwise a list of 1d + numpy arrays is returned. Notes ----- @@ -996,7 +995,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, # Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame. # Return uniform numpy array - if smooth_frames or sum(samps_per_frame) == n_sig: + if smooth_frames: # Figure out the largest required dtype for the segment to minimize memory usage max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True) # Allocate signal array. Minimize dtype From 1b9c90c3e001bcaf96cdad9ce03cd1ccd36fc303 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 14:20:48 -0400 Subject: [PATCH 03/15] rdrecord: handle smooth_frames when maximum spf is 1. rdrecord can return signal data in any of four formats: - by resampling all signals to a uniform rate ("smoothing"), and setting d_signal to a two-dimensional array, where d_signal[t,s] is sample t of signal s - by resampling to a uniform rate, and setting p_signal to a two-dimensional array, where p_signal[t,s] is sample t of signal s converted into physical units - by setting e_d_signal to a list of one-dimensional arrays, where e_d_signal[s][t] is sample t of signal s - by setting e_p_signal to a list of one-dimensional arrays, where e_p_signal[s][t] is sample t of signal s converted into physical units If the selected signals contain multiple samples per frame, the behavior of rdrecord is consistent: - If smooth_frames is True, the selected signals are resampled to the frame rate and stored as d_signal or p_signal. - If smooth_frames is False, the selected signals are stored in their original form as e_d_signal or e_p_signal. However, if each of the selected signals contains only one sample per frame, rdrecord would previously behave inconsistently: - If all signals in the record contain only one sample per frame, and smooth_frames is False and physical is True, the selected signals would be stored as p_signal (not e_p_signal as the caller would expect.) - If all signals in the record contain only one sample per frame, and smooth_frames is False and physical is False, rdrecord would crash with a TypeError in convert_dtype. - If some signals in the record contain multiple samples per frame (but the selected signals don't), rdrecord would crash with an AttributeError in _arrange_fields. Change this behavior so that if smooth_frames is false, rdrecord will always store the signals as e_d_signal or e_p_signal, regardless of the underlying number of samples per frame. --- wfdb/io/record.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index bc51cda8..0c772403 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -3363,10 +3363,9 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, directly return a WFDB MultiRecord object (False), or to convert it into and return a WFDB Record object (True). smooth_frames : bool, optional - Used when reading records with signals having multiple samples - per frame. Specifies whether to smooth the samples in signals - with more than one sample per frame and return an (MxN) uniform - numpy array as the `d_signal` or `p_signal` field (True), or to + Specifies whether to smooth the samples in signals with more + than one sample per frame and return an (MxN) uniform numpy + array as the `d_signal` or `p_signal` field (True), or to return a list of 1d numpy arrays containing every expanded sample as the `e_d_signal` or `e_p_signal` field (False). ignore_skew : bool, optional @@ -3535,7 +3534,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, return_res=return_res) # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array - if smooth_frames or max([record.samps_per_frame[c] for c in channels]) == 1: + if smooth_frames: # Read signals from the associated dat files that contain # wanted channels record.d_signal = signals From d0173b4683cd9ddbd5504a6385c6b6270d29af46 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 15:21:38 -0400 Subject: [PATCH 04/15] calc_checksum: handle non-smooth partial records. calc_checksum is called in order to calculate checksums of the signal data (d_signal or e_d_signal) in a record. In particular, if rdrecord is used to read only part of a record, it will call calc_checksum to determine the checksums of that part of the record. However, at that time, self.n_sig is still equal to the total number of signals in the input record (not the number of signals stored in d_signal or e_d_signal, which might be different if a subset of channels are selected). Thus, if expanded is true and self.n_sig > len(self.e_d_signal), this would crash. For simplicity, and consistency with the expanded=False case, ignore n_sig and simply calculate the checksums of e_d_signal. --- wfdb/io/_signal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index c660269b..902ce867 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -750,7 +750,7 @@ def calc_checksum(self, expanded=False): """ if expanded: - cs = [int(np.sum(self.e_d_signal[ch]) % 65536) for ch in range(self.n_sig)] + cs = [int(np.sum(s) % 65536) for s in self.e_d_signal] else: cs = np.sum(self.d_signal, 0) % 65536 cs = [int(c) for c in cs] From 7008e47a0fb089ee040b61887e5f637eb4c9ac32 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 16:39:10 -0400 Subject: [PATCH 05/15] Record.__eq__: allow checking equality of lists of arrays. The Record.__eq__ function is currently used in the test suite to check whether two Record objects have the same contents. (This function doesn't actually conform to the standard Python API, but it works for this purpose.) In the case of "expanded"-format records, the e_p_signal and/or e_d_signal attributes are lists of numpy.ndarray objects, not numpy.ndarray objects themselves. In order to compare these for equality we must compare each element separately. --- wfdb/io/record.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 0c772403..ab705621 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -628,6 +628,10 @@ def __eq__(self, other, verbose=False): if type(v1) == np.ndarray: # Necessary for nans np.testing.assert_array_equal(v1, v2) + elif (type(v1) == list and len(v1) == len(v2) + and all(type(e) == np.ndarray for e in v1)): + for (e1, e2) in zip(v1, v2): + np.testing.assert_array_equal(e1, e2) else: if v1 != v2: if verbose: From 68bb86d42305d1fd46b40c47c10fd5f58fefdcdf Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 8 Jul 2021 15:01:14 -0400 Subject: [PATCH 06/15] test_1c: test reading single-frequency record in non-smooth mode. When smooth_frames is False, rdrecord should store the signal data as a list of 1D arrays (each signal at its original sampling frequency) rather than as a "smooth" 2D array. Previously, this would fail if the input record contained only one sample per frame. Modify the existing test case to check that this works. --- tests/test_record.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index ccdad08c..dc879a91 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -75,24 +75,30 @@ def test_1b(self): def test_1c(self): """ Format 16, byte offset, selected duration, selected channels, - digital. + digital, expanded format. Target file created with: rdsamp -r sample-data/a103l -f 80 -s 0 1 | cut -f 2- > record-1c """ record = wfdb.rdrecord('sample-data/a103l', - sampfrom=20000, channels=[0, 1], physical=False) - sig = record.d_signal + sampfrom=20000, channels=[0, 1], physical=False, + smooth_frames=False) + # convert expanded to uniform array + sig = np.zeros((record.sig_len, record.n_sig)) + for i in range(record.n_sig): + sig[:,i] = record.e_d_signal[i] + sig_target = np.genfromtxt('tests/target-output/record-1c') # Compare data streaming from Physionet record_pn = wfdb.rdrecord('a103l', pn_dir='challenge-2015/training', sampfrom=20000, channels=[0, 1], - physical=False) + physical=False, smooth_frames=False) # Test file writing - record.wrsamp() - record_write = wfdb.rdrecord('a103l', physical=False) + record.wrsamp(expanded=True) + record_write = wfdb.rdrecord('a103l', physical=False, + smooth_frames=False) assert np.array_equal(sig, sig_target) assert record.__eq__(record_pn) From 9d77075ebff003222c7edab84c94e6e6289481fc Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 16 Nov 2021 12:58:21 -0500 Subject: [PATCH 07/15] Record.__eq__: replace exact type comparisons with isinstance. When comparing the attributes of two records, if a value is an instance of a class derived from 'np.ndarray' or a class derived from 'list', it should be treated the same way as an 'np.ndarray' or a 'list'. Note that there currently are no such classes defined or used by this package. Note also that in any case, the respective types of the two attributes must be equal in order for the values to be considered equal. --- wfdb/io/record.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index ab705621..5ffeea80 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -625,11 +625,11 @@ def __eq__(self, other, verbose=False): print('Mismatch in attribute: %s' % k, v1, v2) return False - if type(v1) == np.ndarray: + if isinstance(v1, np.ndarray): # Necessary for nans np.testing.assert_array_equal(v1, v2) - elif (type(v1) == list and len(v1) == len(v2) - and all(type(e) == np.ndarray for e in v1)): + elif (isinstance(v1, list) and len(v1) == len(v2) + and all(isinstance(e, np.ndarray) for e in v1)): for (e1, e2) in zip(v1, v2): np.testing.assert_array_equal(e1, e2) else: From ca20b1a360b10315f2ec5eab6d2df382497b8583 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 15 Mar 2022 11:31:01 -0400 Subject: [PATCH 08/15] rdrecord: smooth frames by invoking smooth_frames(). To simplify the implementation of _rd_segment and _rd_dat_signals, we want to eliminate the smooth_frames argument, so that the return values of these two functions will always have the same type (a list of numpy arrays.) Therefore, if the application requested frame smoothing, then instead of calling _rd_segment with smooth_frames=True, we will call _rd_segment with smooth_frames=False, and post-process the result by calling Record.smooth_frames. Record.smooth_frames (SignalMixin.smooth_frames) will give a result equivalent to what _rd_segment gives with smooth_frames=True, but there are likely differences in performance: - Record.smooth_frames performs the computation by slicing along the "long" axis and storing the intermediate results in an int64 numpy array. _rd_dat_signals slices along the "short" axis and stores the intermediate results in a Python list. Record.smooth_frames should therefore be faster for large inputs. - Record.smooth_frames only operates on the channels present in e_d_signal, whereas _rd_dat_signals smooths all of the signals in the input file. Record.smooth_frames therefore saves memory and time when reading a subset of channels. - Record.smooth_frames always returns an int64 array, whereas _rd_dat_signals returns an array of the same type as the original data. Record.smooth_frames therefore uses more memory in many cases. (Note that rdrecord will post-process the result in any case, making this change invisible to applications; the issue of increased temporary memory usage can be addressed separately.) - If there are multiple channels in a signal file, then calling _rd_dat_signals with smooth_frames=False requires making an extra copy of each signal that has multiple samples per frame (because of the "reshape(-1)".) (This could be addressed in the future by allowing _rd_segment, or at least _rd_dat_signals, to return a list of *two-dimensional* arrays instead.) In order for this to work correctly, Record.smooth_frames must be called after setting both e_d_signal and samps_per_frame. In particular, it must be done after Record._arrange_fields "rearranges" samps_per_frame according to channels. On the other hand, _arrange_fields is expected to set checksum and init_value in different ways depending on whether the result is to be smoothed. (This use of checksum and init_value is somewhat dubious.) Therefore, smooth_frames is now invoked as part of _arrange_fields, after setting channel-specific metadata and before setting checksum and init_value. _arrange_fields should never be invoked other than by rdrecord; it doesn't make any sense to call this function at other times. Change the signature of this function to reflect the fact that it actively transforms the signal array, and make all arguments mandatory. --- wfdb/io/record.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 5ffeea80..38b8ae39 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -668,7 +668,7 @@ def wrsamp(self, expanded=False, write_dir=''): self.wr_dats(expanded=expanded, write_dir=write_dir) - def _arrange_fields(self, channels, sampfrom=0, expanded=False): + def _arrange_fields(self, channels, sampfrom, smooth_frames): """ Arrange/edit object fields to reflect user channel and/or signal range input. @@ -677,10 +677,11 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): ---------- channels : list List of channel numbers specified. - sampfrom : int, optional + sampfrom : int Starting sample number read. - expanded : bool, optional - Whether the record was read in expanded mode. + smooth_frames : bool + Whether to convert the expanded signal array (e_d_signal) into + a smooth signal array (d_signal). Returns ------- @@ -693,11 +694,11 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): setattr(self, field, [item[c] for c in channels]) # Expanded signals - multiple samples per frame. - if expanded: + if not smooth_frames: # Checksum and init_value to be updated if present # unless the whole signal length was input if self.sig_len != int(len(self.e_d_signal[0]) / self.samps_per_frame[0]): - self.checksum = self.calc_checksum(expanded) + self.checksum = self.calc_checksum(True) self.init_value = [s[0] for s in self.e_d_signal] self.n_sig = len(channels) @@ -705,6 +706,9 @@ def _arrange_fields(self, channels, sampfrom=0, expanded=False): # MxN numpy array d_signal else: + self.d_signal = self.smooth_frames('digital') + self.e_d_signal = None + # Checksum and init_value to be updated if present # unless the whole signal length was input if self.sig_len != self.d_signal.shape[0]: @@ -3517,7 +3521,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, no_file = False sig_data = None - signals = _signal._rd_segment( + record.e_d_signal = _signal._rd_segment( file_name=record.file_name, dir_name=dir_name, pn_dir=pn_dir, @@ -3531,7 +3535,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, sampfrom=sampfrom, sampto=sampto, channels=channels, - smooth_frames=smooth_frames, + smooth_frames=False, ignore_skew=ignore_skew, no_file=no_file, sig_data=sig_data, @@ -3539,14 +3543,10 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array if smooth_frames: - # Read signals from the associated dat files that contain - # wanted channels - record.d_signal = signals - # Arrange/edit the object fields to reflect user channel # and/or signal range input record._arrange_fields(channels=channels, sampfrom=sampfrom, - expanded=False) + smooth_frames=True) if physical: # Perform inplace dac to get physical signal @@ -3554,12 +3554,10 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, # Return each sample of the signals with multiple samples per frame else: - record.e_d_signal = signals - # Arrange/edit the object fields to reflect user channel # and/or signal range input record._arrange_fields(channels=channels, sampfrom=sampfrom, - expanded=True) + smooth_frames=False) if physical: # Perform dac to get physical signal From b2f39955f0ba42e9f422ddf409dbc0bb772bf226 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 15 Mar 2022 12:46:06 -0400 Subject: [PATCH 09/15] _rd_segment, _rd_dat_signals: remove support for smooth_frames=True. Now, _rd_segment is only ever called with smooth_frames=False (the function previously fulfilled by smooth_frames=True is handled by invoking Record.smooth_frames in Record._arrange_fields.) Accordingly, remove the logic for smoothing frames within _rd_dat_signals and for handling "smoothed" data in _rd_segment. --- wfdb/io/_signal.py | 155 +++++++++++++++------------------------------ 1 file changed, 50 insertions(+), 105 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 902ce867..7268c2f1 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -900,7 +900,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, sampto : int The final sample number to be read from the signals. smooth_frames : bool - Whether to return the result as a two-dimensional array. + Deprecated. Must be set to False. ignore_skew : bool Used when reading records with at least one skewed signal. Specifies whether to apply the skew to align the signals in the @@ -920,16 +920,14 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, Returns ------- - signals : ndarray, list - The signals read from the dat file(s). A 2d numpy array is - returned if `smooth_frames` is True. Otherwise a list of 1d - numpy arrays is returned. + signals : list + The signals read from the dat file(s). Each signal is returned as a + one-dimensional numpy array. Notes ----- - 'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew' - are user desired input fields. All other parameters are - specifications of the segment. + 'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired + input fields. All other parameters are specifications of the segment. """ # Check for valid inputs @@ -993,61 +991,35 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]] out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]] - # Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame. - # Return uniform numpy array if smooth_frames: - # Figure out the largest required dtype for the segment to minimize memory usage - max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True) - # Allocate signal array. Minimize dtype - signals = np.zeros([sampto-sampfrom, len(channels)], dtype=max_dtype) - - # Read each wanted dat file and store signals - for fn in w_file_name: - datsignals = _rd_dat_signals( - file_name=fn, - dir_name=dir_name, - pn_dir=pn_dir, - fmt=w_fmt[fn], - n_sig=len(datchannel[fn]), - sig_len=sig_len, - byte_offset=w_byte_offset[fn], - samps_per_frame=w_samps_per_frame[fn], - skew=w_skew[fn], - init_value=w_init_value[fn], - sampfrom=sampfrom, - sampto=sampto, - smooth_frames=smooth_frames, - no_file=no_file, - sig_data=sig_data) - signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]] + raise ValueError('smooth_frames=True is not supported') # Return each sample in signals with multiple samples/frame, without smoothing. # Return a list of numpy arrays for each signal. - else: - signals = [None] * len(channels) - - for fn in w_file_name: - # Get the list of all signals contained in the dat file - datsignals = _rd_dat_signals( - file_name=fn, - dir_name=dir_name, - pn_dir=pn_dir, - fmt=w_fmt[fn], - n_sig=len(datchannel[fn]), - sig_len=sig_len, - byte_offset=w_byte_offset[fn], - samps_per_frame=w_samps_per_frame[fn], - skew=w_skew[fn], - init_value=w_init_value[fn], - sampfrom=sampfrom, - sampto=sampto, - smooth_frames=smooth_frames, - no_file=no_file, - sig_data=sig_data) - - # Copy over the wanted signals - for cn in range(len(out_dat_channel[fn])): - signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]] + signals = [None] * len(channels) + + for fn in w_file_name: + # Get the list of all signals contained in the dat file + datsignals = _rd_dat_signals( + file_name=fn, + dir_name=dir_name, + pn_dir=pn_dir, + fmt=w_fmt[fn], + n_sig=len(datchannel[fn]), + sig_len=sig_len, + byte_offset=w_byte_offset[fn], + samps_per_frame=w_samps_per_frame[fn], + skew=w_skew[fn], + init_value=w_init_value[fn], + sampfrom=sampfrom, + sampto=sampto, + smooth_frames=smooth_frames, + no_file=no_file, + sig_data=sig_data) + + # Copy over the wanted signals + for cn in range(len(out_dat_channel[fn])): + signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]] return signals @@ -1088,7 +1060,7 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, sampto : int The final sample number to be read from the signals. smooth_frames : bool - Whether to return the result as a two-dimensional array. + Deprecated. Must be set to False. no_file : bool, optional Used when using this function with just an array of signal data and no associated file to read the data from. @@ -1099,15 +1071,13 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, Returns ------- signal : ndarray, list - The signals read from the dat file(s). A 2d numpy array is - returned if `smooth_frames` is True. Otherwise a list of 1d - numpy arrays is returned. + The signals read from the dat file(s). Each signal is returned as a + one-dimensional numpy array. Notes ----- - 'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew' - are user desired input fields. All other parameters are - specifications of the segment. + 'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired + input fields. All other parameters are specifications of the segment. """ # Check for valid inputs @@ -1209,46 +1179,21 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, # At this point, dtype of sig_data is the minimum integer format # required for storing the final digital samples. - # No extra samples/frame. Obtain original uniform numpy array - if smooth_frames and tsamps_per_frame == n_sig: - # Reshape into multiple channels - signal = sig_data.reshape(-1, n_sig) - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace) - # Extra frames present to be smoothed. Obtain averaged uniform numpy array - elif smooth_frames: - # Allocate memory for smoothed signal. - signal = np.zeros((int(len(sig_data) / tsamps_per_frame) , n_sig), - dtype=sig_data.dtype) - - # Transfer and average samples - for ch in range(n_sig): - if samps_per_frame[ch] == 1: - signal[:, ch] = sig_data[sum(([0] + samps_per_frame)[:ch + 1])::tsamps_per_frame] - else: - if ch == 0: - startind = 0 - else: - startind = np.sum(samps_per_frame[:ch]) - signal[:,ch] = [np.average(sig_data[ind:ind+samps_per_frame[ch]]) for ind in range(startind,len(sig_data),tsamps_per_frame)] - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace) + if smooth_frames: + raise ValueError('smooth_frames=True is not supported') - # Extra frames present without wanting smoothing. Return all - # expanded samples. - else: - # List of 1d numpy arrays - signal = [] - # Transfer over samples - sig_frames = sig_data.reshape(-1, tsamps_per_frame) - ch_start = 0 - for ch in range(n_sig): - ch_end = ch_start + samps_per_frame[ch] - ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1) - signal.append(ch_signal) - ch_start = ch_end - # Skew the signal - signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame) + # List of 1d numpy arrays + signal = [] + # Transfer over samples + sig_frames = sig_data.reshape(-1, tsamps_per_frame) + ch_start = 0 + for ch in range(n_sig): + ch_end = ch_start + samps_per_frame[ch] + ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1) + signal.append(ch_signal) + ch_start = ch_end + # Skew the signal + signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame) # Integrity check of signal shape after reading _check_sig_dims(signal, read_len, n_sig, samps_per_frame) From d02c478be6b2ae753eaddb53175cc7d147a328ae Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 15 Mar 2022 12:54:41 -0400 Subject: [PATCH 10/15] _rd_segment, _rd_dat_signals: remove smooth_frames parameter. The smooth_frames parameter has been deprecated, and these functions always return a list of arrays; remove the parameter. Note that _rd_segment is only ever called by rdrecord, and _rd_dat_signals is only ever called by _rd_segment. --- wfdb/io/_signal.py | 16 ++-------------- wfdb/io/record.py | 1 - 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 7268c2f1..14648c5f 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -866,7 +866,7 @@ def smooth_frames(self, sigtype='physical'): def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, samps_per_frame, skew, init_value, sampfrom, sampto, channels, - smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64): + ignore_skew, no_file=False, sig_data=None, return_res=64): """ Read the digital samples from a single segment record's associated dat file(s). @@ -899,8 +899,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, The starting sample number to be read from the signals. sampto : int The final sample number to be read from the signals. - smooth_frames : bool - Deprecated. Must be set to False. ignore_skew : bool Used when reading records with at least one skewed signal. Specifies whether to apply the skew to align the signals in the @@ -991,9 +989,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]] out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]] - if smooth_frames: - raise ValueError('smooth_frames=True is not supported') - # Return each sample in signals with multiple samples/frame, without smoothing. # Return a list of numpy arrays for each signal. signals = [None] * len(channels) @@ -1013,7 +1008,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, init_value=w_init_value[fn], sampfrom=sampfrom, sampto=sampto, - smooth_frames=smooth_frames, no_file=no_file, sig_data=sig_data) @@ -1026,8 +1020,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset, samps_per_frame, skew, init_value, - sampfrom, sampto, smooth_frames, - no_file=False, sig_data=None): + sampfrom, sampto, no_file=False, sig_data=None): """ Read all signals from a WFDB dat file. @@ -1059,8 +1052,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, The starting sample number to be read from the signals. sampto : int The final sample number to be read from the signals. - smooth_frames : bool - Deprecated. Must be set to False. no_file : bool, optional Used when using this function with just an array of signal data and no associated file to read the data from. @@ -1179,9 +1170,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, # At this point, dtype of sig_data is the minimum integer format # required for storing the final digital samples. - if smooth_frames: - raise ValueError('smooth_frames=True is not supported') - # List of 1d numpy arrays signal = [] # Transfer over samples diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 38b8ae39..9d0ff047 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -3535,7 +3535,6 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, sampfrom=sampfrom, sampto=sampto, channels=channels, - smooth_frames=False, ignore_skew=ignore_skew, no_file=no_file, sig_data=sig_data, From 13a894866ec1398200d22c99e1b5329f9e93660a Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 1 Mar 2022 10:20:27 -0500 Subject: [PATCH 11/15] SignalMixin.smooth_frames: consolidate duplicated logic. The logic for physical and digital cases is exactly the same except for the choice of input data and the resulting data type. --- wfdb/io/_signal.py | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 14648c5f..2509b6e7 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -831,33 +831,26 @@ def smooth_frames(self, sigtype='physical'): tspf = sum(spf) if sigtype == 'physical': - n_sig = len(self.e_p_signal) - sig_len = int(len(self.e_p_signal[0])/spf[0]) - signal = np.zeros((sig_len, n_sig), dtype='float64') - - for ch in range(n_sig): - if spf[ch] == 1: - signal[:, ch] = self.e_p_signal[ch] - else: - for frame in range(spf[ch]): - signal[:, ch] += self.e_p_signal[ch][frame::spf[ch]] - signal[:, ch] = signal[:, ch] / spf[ch] - + expanded_signal = self.e_p_signal + output_dtype = np.dtype('float64') elif sigtype == 'digital': - n_sig = len(self.e_d_signal) - sig_len = int(len(self.e_d_signal[0])/spf[0]) - signal = np.zeros((sig_len, n_sig), dtype='int64') - - for ch in range(n_sig): - if spf[ch] == 1: - signal[:, ch] = self.e_d_signal[ch] - else: - for frame in range(spf[ch]): - signal[:, ch] += self.e_d_signal[ch][frame::spf[ch]] - signal[:, ch] = signal[:, ch] / spf[ch] + expanded_signal = self.e_d_signal + output_dtype = np.dtype('int64') else: raise ValueError("sigtype must be 'physical' or 'digital'") + n_sig = len(expanded_signal) + sig_len = int(len(expanded_signal[0])/spf[0]) + signal = np.zeros((sig_len, n_sig), dtype=output_dtype) + + for ch in range(n_sig): + if spf[ch] == 1: + signal[:, ch] = expanded_signal[ch] + else: + for frame in range(spf[ch]): + signal[:, ch] += expanded_signal[ch][frame::spf[ch]] + signal[:, ch] = signal[:, ch] / spf[ch] + return signal From b09bab41de09253a7d99e99e585a7def7094c03d Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 1 Mar 2022 10:42:07 -0500 Subject: [PATCH 12/15] SignalMixin.smooth_frames: check lengths of input arrays. Ensure that all of the input signals have the expected lengths before trying to do anything else. (If any of the signal lengths were wrong, this would typically have raised a ValueError later on - though not always, since numpy will try to be clever if one of the arguments is an array of length 1.) --- wfdb/io/_signal.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 2509b6e7..94076fee 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -841,6 +841,12 @@ def smooth_frames(self, sigtype='physical'): n_sig = len(expanded_signal) sig_len = int(len(expanded_signal[0])/spf[0]) + for ch in range(n_sig): + if len(expanded_signal[ch]) != sig_len * spf[ch]: + raise ValueError("length mismatch: signal %d has %d samples," + " expected %dx%d" + % (ch, len(expanded_signal), + sig_len, spf[ch])) signal = np.zeros((sig_len, n_sig), dtype=output_dtype) for ch in range(n_sig): From 6db9ebfd56e9dee75979bc2fe9f3febeb47fb562 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 1 Mar 2022 11:05:13 -0500 Subject: [PATCH 13/15] SignalMixin.smooth_frames: use minimal data type for result. Instead of always returning the result as an int64 or float64 array, select the output type based on the types of the input arrays. The output type should be the smallest type that has the correct "kind" and is able to represent all input values. For example, in digital mode, if the input includes some int8 arrays and some int16 arrays, the result should be an int16 array. In physical mode, if the inputs are all float32, then the result will be float32; otherwise the result will be float64. However, although the output type should generally match the input type, intermediate results may need to be stored as a different type. For example, if the input and output are both int16, and one or more signals have spf > 1 and use the entire 16-bit range, then the sum of N samples will overflow an int16. Previously, it was fine simply to store the intermediate results in the output array itself, because the output array was 64-bit, and no WFDB format has more than 32-bit precision, and spf is (in practice) limited to at most 2**31-1. For simplicity, continue using int64 or float64 as the intermediate type, regardless of the actual input types and spf. At the same time, we can also optimize the calculation slightly by reshaping the input array and using np.sum, avoiding another Python loop. --- wfdb/io/_signal.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 94076fee..a47fa140 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -830,32 +830,53 @@ def smooth_frames(self, sigtype='physical'): # Total samples per frame tspf = sum(spf) + # The output data type should be the smallest type that can + # represent any input sample value. The intermediate data type + # must be able to represent the sum of spf[ch] sample values. + if sigtype == 'physical': expanded_signal = self.e_p_signal - output_dtype = np.dtype('float64') + intermediate_dtype = np.dtype('float64') + allowed_dtypes = [ + np.dtype('float32'), + np.dtype('float64'), + ] elif sigtype == 'digital': expanded_signal = self.e_d_signal - output_dtype = np.dtype('int64') + intermediate_dtype = np.dtype('int64') + allowed_dtypes = [ + np.dtype('int8'), + np.dtype('int16'), + np.dtype('int32'), + np.dtype('int64'), + ] else: raise ValueError("sigtype must be 'physical' or 'digital'") n_sig = len(expanded_signal) sig_len = int(len(expanded_signal[0])/spf[0]) + input_dtypes = set() for ch in range(n_sig): if len(expanded_signal[ch]) != sig_len * spf[ch]: raise ValueError("length mismatch: signal %d has %d samples," " expected %dx%d" % (ch, len(expanded_signal), sig_len, spf[ch])) + input_dtypes.add(expanded_signal[ch].dtype) + + for output_dtype in allowed_dtypes: + if all(dt <= output_dtype for dt in input_dtypes): + break + signal = np.zeros((sig_len, n_sig), dtype=output_dtype) for ch in range(n_sig): if spf[ch] == 1: signal[:, ch] = expanded_signal[ch] else: - for frame in range(spf[ch]): - signal[:, ch] += expanded_signal[ch][frame::spf[ch]] - signal[:, ch] = signal[:, ch] / spf[ch] + frames = expanded_signal[ch].reshape(-1, spf[ch]) + signal_sum = np.sum(frames, axis=1, dtype=intermediate_dtype) + signal[:, ch] = signal_sum / spf[ch] return signal From 186ae72c1ec39a84745f3151f7b2b8f4bd03087a Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 1 Mar 2022 09:58:42 -0500 Subject: [PATCH 14/15] SignalMixin.smooth_frames: assorted optimizations. The variable tspf is not used and can be removed. sig_len can be calculated using // rather than / and truncating to an integer (the latter could be incorrect for records longer than 2**53.) The output array can be allocated using np.empty instead of np.zeros since it will be fully initialized by the subsequent loop. Since each frame is independent of the others, the loop can be broken up into blocks (here, arbitrarily chosen as 2**16 frames) to reduce temporary memory usage while still spending most of our CPU time in numpy operations. --- wfdb/io/_signal.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index a47fa140..ededa1b9 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -827,9 +827,6 @@ def smooth_frames(self, sigtype='physical'): if spf[ch] is None: spf[ch] = 1 - # Total samples per frame - tspf = sum(spf) - # The output data type should be the smallest type that can # represent any input sample value. The intermediate data type # must be able to represent the sum of spf[ch] sample values. @@ -854,7 +851,7 @@ def smooth_frames(self, sigtype='physical'): raise ValueError("sigtype must be 'physical' or 'digital'") n_sig = len(expanded_signal) - sig_len = int(len(expanded_signal[0])/spf[0]) + sig_len = len(expanded_signal[0]) // spf[0] input_dtypes = set() for ch in range(n_sig): if len(expanded_signal[ch]) != sig_len * spf[ch]: @@ -868,15 +865,22 @@ def smooth_frames(self, sigtype='physical'): if all(dt <= output_dtype for dt in input_dtypes): break - signal = np.zeros((sig_len, n_sig), dtype=output_dtype) + signal = np.empty((sig_len, n_sig), dtype=output_dtype) + + # Large input arrays will be processed in chunks to avoid the need + # to allocate a single huge temporary array. + CHUNK_SIZE = 65536 for ch in range(n_sig): if spf[ch] == 1: signal[:, ch] = expanded_signal[ch] else: frames = expanded_signal[ch].reshape(-1, spf[ch]) - signal_sum = np.sum(frames, axis=1, dtype=intermediate_dtype) - signal[:, ch] = signal_sum / spf[ch] + for chunk_start in range(0, sig_len, CHUNK_SIZE): + chunk_end = chunk_start + CHUNK_SIZE + signal_sum = np.sum(frames[chunk_start:chunk_end, :], + axis=1, dtype=intermediate_dtype) + signal[chunk_start:chunk_end, ch] = signal_sum / spf[ch] return signal From 4cd1d7b35a841a777b410b3ff102299361303cf5 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Mon, 21 Mar 2022 13:18:33 -0400 Subject: [PATCH 15/15] TestRecord: test rdrecord with the return_res parameter. --- tests/test_record.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index dc879a91..b592ef7c 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -26,19 +26,22 @@ def test_1a(self): Target file created with: rdsamp -r sample-data/test01_00s | cut -f 2- > record-1a """ - record = wfdb.rdrecord('sample-data/test01_00s', physical=False) + record = wfdb.rdrecord('sample-data/test01_00s', + physical=False, return_res=16) sig = record.d_signal sig_target = np.genfromtxt('tests/target-output/record-1a') # Compare data streaming from Physionet - record_pn = wfdb.rdrecord('test01_00s', physical=False, - pn_dir='macecgdb') + record_pn = wfdb.rdrecord('test01_00s', pn_dir='macecgdb', + physical=False, return_res=16) # Test file writing - record_2 = wfdb.rdrecord('sample-data/test01_00s', physical=False) + record_2 = wfdb.rdrecord('sample-data/test01_00s', + physical=False, return_res=16) record_2.sig_name = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4'] record_2.wrsamp() - record_write = wfdb.rdrecord('test01_00s', physical=False) + record_write = wfdb.rdrecord('test01_00s', + physical=False, return_res=16) assert np.array_equal(sig, sig_target) assert record.__eq__(record_pn)