MIT-LCP · bemoody · Mar 28, 2022 · Jul 8, 2021 · Jul 8, 2021 · Jul 8, 2021
diff --git a/tests/test_record.py b/tests/test_record.py
@@ -26,19 +26,22 @@ def test_1a(self):
         Target file created with:
             rdsamp -r sample-data/test01_00s | cut -f 2- > record-1a
         """
-        record = wfdb.rdrecord('sample-data/test01_00s', physical=False)
+        record = wfdb.rdrecord('sample-data/test01_00s',
+                               physical=False, return_res=16)
         sig = record.d_signal
         sig_target = np.genfromtxt('tests/target-output/record-1a')
 
         # Compare data streaming from Physionet
-        record_pn = wfdb.rdrecord('test01_00s', physical=False,
-                                  pn_dir='macecgdb')
+        record_pn = wfdb.rdrecord('test01_00s', pn_dir='macecgdb',
+                                  physical=False, return_res=16)
 
         # Test file writing
-        record_2 = wfdb.rdrecord('sample-data/test01_00s', physical=False)
+        record_2 = wfdb.rdrecord('sample-data/test01_00s',
+                                 physical=False, return_res=16)
         record_2.sig_name = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4']
         record_2.wrsamp()
-        record_write = wfdb.rdrecord('test01_00s', physical=False)
+        record_write = wfdb.rdrecord('test01_00s',
+                                     physical=False, return_res=16)
 
         assert np.array_equal(sig, sig_target)
         assert record.__eq__(record_pn)
@@ -75,24 +78,30 @@ def test_1b(self):
     def test_1c(self):
         """
         Format 16, byte offset, selected duration, selected channels,
-        digital.
+        digital, expanded format.
 
         Target file created with:
             rdsamp -r sample-data/a103l -f 80 -s 0 1 | cut -f 2- > record-1c
         """
         record = wfdb.rdrecord('sample-data/a103l',
-                               sampfrom=20000, channels=[0, 1], physical=False)
-        sig = record.d_signal
+                               sampfrom=20000, channels=[0, 1], physical=False,
+                               smooth_frames=False)
+        # convert expanded to uniform array
+        sig = np.zeros((record.sig_len, record.n_sig))
+        for i in range(record.n_sig):
+            sig[:,i] = record.e_d_signal[i]
+
         sig_target = np.genfromtxt('tests/target-output/record-1c')
 
         # Compare data streaming from Physionet
         record_pn = wfdb.rdrecord('a103l', pn_dir='challenge-2015/training',
                                   sampfrom=20000, channels=[0, 1],
-                                  physical=False)
+                                  physical=False, smooth_frames=False)
 
         # Test file writing
-        record.wrsamp()
-        record_write = wfdb.rdrecord('a103l', physical=False)
+        record.wrsamp(expanded=True)
+        record_write = wfdb.rdrecord('a103l', physical=False,
+                                     smooth_frames=False)
 
         assert np.array_equal(sig, sig_target)
         assert record.__eq__(record_pn)

diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py
@@ -750,7 +750,7 @@ def calc_checksum(self, expanded=False):
 
         """
         if expanded:
-            cs = [int(np.sum(self.e_d_signal[ch]) % 65536) for ch in range(self.n_sig)]
+            cs = [int(np.sum(s) % 65536) for s in self.e_d_signal]
         else:
             cs = np.sum(self.d_signal, 0) % 65536
             cs = [int(c) for c in cs]
@@ -827,37 +827,61 @@ def smooth_frames(self, sigtype='physical'):
             if spf[ch] is None:
                 spf[ch] = 1
 
-        # Total samples per frame
-        tspf = sum(spf)
+        # The output data type should be the smallest type that can
+        # represent any input sample value.  The intermediate data type
+        # must be able to represent the sum of spf[ch] sample values.
 
         if sigtype == 'physical':
-            n_sig = len(self.e_p_signal)
-            sig_len = int(len(self.e_p_signal[0])/spf[0])
-            signal = np.zeros((sig_len, n_sig), dtype='float64')
-
-            for ch in range(n_sig):
-                if spf[ch] == 1:
-                    signal[:, ch] = self.e_p_signal[ch]
-                else:
-                    for frame in range(spf[ch]):
-                        signal[:, ch] += self.e_p_signal[ch][frame::spf[ch]]
-                    signal[:, ch] = signal[:, ch] / spf[ch]
-
+            expanded_signal = self.e_p_signal
+            intermediate_dtype = np.dtype('float64')
+            allowed_dtypes = [
+                np.dtype('float32'),
+                np.dtype('float64'),
+            ]
         elif sigtype == 'digital':
-            n_sig = len(self.e_d_signal)
-            sig_len = int(len(self.e_d_signal[0])/spf[0])
-            signal = np.zeros((sig_len, n_sig), dtype='int64')
-
-            for ch in range(n_sig):
-                if spf[ch] == 1:
-                    signal[:, ch] = self.e_d_signal[ch]
-                else:
-                    for frame in range(spf[ch]):
-                        signal[:, ch] += self.e_d_signal[ch][frame::spf[ch]]
-                    signal[:, ch] = signal[:, ch] / spf[ch]
+            expanded_signal = self.e_d_signal
+            intermediate_dtype = np.dtype('int64')
+            allowed_dtypes = [
+                np.dtype('int8'),
+                np.dtype('int16'),
+                np.dtype('int32'),
+                np.dtype('int64'),
+            ]
         else:
             raise ValueError("sigtype must be 'physical' or 'digital'")
 
+        n_sig = len(expanded_signal)
+        sig_len = len(expanded_signal[0]) // spf[0]
+        input_dtypes = set()
+        for ch in range(n_sig):
+            if len(expanded_signal[ch]) != sig_len * spf[ch]:
+                raise ValueError("length mismatch: signal %d has %d samples,"
+                                 " expected %dx%d"
+                                 % (ch, len(expanded_signal),
+                                    sig_len, spf[ch]))
+            input_dtypes.add(expanded_signal[ch].dtype)
+
+        for output_dtype in allowed_dtypes:
+            if all(dt <= output_dtype for dt in input_dtypes):
+                break
+
+        signal = np.empty((sig_len, n_sig), dtype=output_dtype)
+
+        # Large input arrays will be processed in chunks to avoid the need
+        # to allocate a single huge temporary array.
+        CHUNK_SIZE = 65536
+
+        for ch in range(n_sig):
+            if spf[ch] == 1:
+                signal[:, ch] = expanded_signal[ch]
+            else:
+                frames = expanded_signal[ch].reshape(-1, spf[ch])
+                for chunk_start in range(0, sig_len, CHUNK_SIZE):
+                    chunk_end = chunk_start + CHUNK_SIZE
+                    signal_sum = np.sum(frames[chunk_start:chunk_end, :],
+                                        axis=1, dtype=intermediate_dtype)
+                    signal[chunk_start:chunk_end, ch] = signal_sum / spf[ch]
+
         return signal
 
 
@@ -866,7 +890,7 @@ def smooth_frames(self, sigtype='physical'):
 
 def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
                 samps_per_frame, skew, init_value, sampfrom, sampto, channels,
-                smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64):
+                ignore_skew, no_file=False, sig_data=None, return_res=64):
     """
     Read the digital samples from a single segment record's associated
     dat file(s).
@@ -899,8 +923,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
         The starting sample number to be read from the signals.
     sampto : int
         The final sample number to be read from the signals.
-    smooth_frames : bool
-        Whether to smooth channels with multiple samples/frame.
     ignore_skew : bool
         Used when reading records with at least one skewed signal.
         Specifies whether to apply the skew to align the signals in the
@@ -920,17 +942,14 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
 
     Returns
     -------
-    signals : ndarray, list
-        The signals read from the dat file(s). A 2d numpy array is
-        returned if the signals have uniform samples/frame or if
-        `smooth_frames` is True. Otherwise a list of 1d numpy arrays
-        is returned.
+    signals : list
+        The signals read from the dat file(s). Each signal is returned as a
+        one-dimensional numpy array.
 
     Notes
     -----
-    'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
-    are user desired input fields. All other parameters are
-    specifications of the segment.
+    'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
+    input fields. All other parameters are specifications of the segment.
 
     """
     # Check for valid inputs
@@ -994,69 +1013,38 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
         r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]]
         out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]]
 
-    # Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame.
-    # Return uniform numpy array
-    if smooth_frames or sum(samps_per_frame) == n_sig:
-        # Figure out the largest required dtype for the segment to minimize memory usage
-        max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True)
-        # Allocate signal array. Minimize dtype
-        signals = np.zeros([sampto-sampfrom, len(channels)], dtype=max_dtype)
-
-        # Read each wanted dat file and store signals
-        for fn in w_file_name:
-            datsignals = _rd_dat_signals(
-                file_name=fn,
-                dir_name=dir_name,
-                pn_dir=pn_dir,
-                fmt=w_fmt[fn],
-                n_sig=len(datchannel[fn]),
-                sig_len=sig_len,
-                byte_offset=w_byte_offset[fn],
-                samps_per_frame=w_samps_per_frame[fn],
-                skew=w_skew[fn],
-                init_value=w_init_value[fn],
-                sampfrom=sampfrom,
-                sampto=sampto,
-                smooth_frames=smooth_frames,
-                no_file=no_file,
-                sig_data=sig_data)
-            signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]]
-
     # Return each sample in signals with multiple samples/frame, without smoothing.
     # Return a list of numpy arrays for each signal.
-    else:
-        signals = [None] * len(channels)
-
-        for fn in w_file_name:
-            # Get the list of all signals contained in the dat file
-            datsignals = _rd_dat_signals(
-                file_name=fn,
-                dir_name=dir_name,
-                pn_dir=pn_dir,
-                fmt=w_fmt[fn],
-                n_sig=len(datchannel[fn]),
-                sig_len=sig_len,
-                byte_offset=w_byte_offset[fn],
-                samps_per_frame=w_samps_per_frame[fn],
-                skew=w_skew[fn],
-                init_value=w_init_value[fn],
-                sampfrom=sampfrom,
-                sampto=sampto,
-                smooth_frames=smooth_frames,
-                no_file=no_file,
-                sig_data=sig_data)
-
-            # Copy over the wanted signals
-            for cn in range(len(out_dat_channel[fn])):
-                signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]
+    signals = [None] * len(channels)
+
+    for fn in w_file_name:
+        # Get the list of all signals contained in the dat file
+        datsignals = _rd_dat_signals(
+            file_name=fn,
+            dir_name=dir_name,
+            pn_dir=pn_dir,
+            fmt=w_fmt[fn],
+            n_sig=len(datchannel[fn]),
+            sig_len=sig_len,
+            byte_offset=w_byte_offset[fn],
+            samps_per_frame=w_samps_per_frame[fn],
+            skew=w_skew[fn],
+            init_value=w_init_value[fn],
+            sampfrom=sampfrom,
+            sampto=sampto,
+            no_file=no_file,
+            sig_data=sig_data)
+
+        # Copy over the wanted signals
+        for cn in range(len(out_dat_channel[fn])):
+            signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]
 
     return signals
 
 
 def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
                     byte_offset, samps_per_frame, skew, init_value,
-                    sampfrom, sampto, smooth_frames,
-                    no_file=False, sig_data=None):
+                    sampfrom, sampto, no_file=False, sig_data=None):
     """
     Read all signals from a WFDB dat file.
 
@@ -1088,8 +1076,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
         The starting sample number to be read from the signals.
     sampto : int
         The final sample number to be read from the signals.
-    smooth_frames : bool
-        Whether to smooth channels with multiple samples/frame.
     no_file : bool, optional
         Used when using this function with just an array of signal data
         and no associated file to read the data from.
@@ -1100,16 +1086,13 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
     Returns
     -------
     signal : ndarray, list
-        The signals read from the dat file(s). A 2d numpy array is
-        returned if the signals have uniform samples/frame or if
-        `smooth_frames` is True. Otherwise a list of 1d numpy arrays
-        is returned.
+        The signals read from the dat file(s). Each signal is returned as a
+        one-dimensional numpy array.
 
     Notes
     -----
-    'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
-    are user desired input fields. All other parameters are
-    specifications of the segment.
+    'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
+    input fields. All other parameters are specifications of the segment.
 
     """
     # Check for valid inputs
@@ -1211,46 +1194,18 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
     # At this point, dtype of sig_data is the minimum integer format
     # required for storing the final digital samples.
 
-    # No extra samples/frame. Obtain original uniform numpy array
-    if tsamps_per_frame == n_sig:
-        # Reshape into multiple channels
-        signal = sig_data.reshape(-1, n_sig)
-        # Skew the signal
-        signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)
-    # Extra frames present to be smoothed. Obtain averaged uniform numpy array
-    elif smooth_frames:
-        # Allocate memory for smoothed signal.
-        signal = np.zeros((int(len(sig_data) / tsamps_per_frame) , n_sig),
-                       dtype=sig_data.dtype)
-
-        # Transfer and average samples
-        for ch in range(n_sig):
-            if samps_per_frame[ch] == 1:
-                signal[:, ch] = sig_data[sum(([0] + samps_per_frame)[:ch + 1])::tsamps_per_frame]
-            else:
-                if ch == 0:
-                    startind = 0
-                else:
-                    startind = np.sum(samps_per_frame[:ch])
-                signal[:,ch] = [np.average(sig_data[ind:ind+samps_per_frame[ch]]) for ind in range(startind,len(sig_data),tsamps_per_frame)]
-        # Skew the signal
-        signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)
-
-    # Extra frames present without wanting smoothing. Return all
-    # expanded samples.
-    else:
-        # List of 1d numpy arrays
-        signal = []
-        # Transfer over samples
-        sig_frames = sig_data.reshape(-1, tsamps_per_frame)
-        ch_start = 0
-        for ch in range(n_sig):
-            ch_end = ch_start + samps_per_frame[ch]
-            ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
-            signal.append(ch_signal)
-            ch_start = ch_end
-        # Skew the signal
-        signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)
+    # List of 1d numpy arrays
+    signal = []
+    # Transfer over samples
+    sig_frames = sig_data.reshape(-1, tsamps_per_frame)
+    ch_start = 0
+    for ch in range(n_sig):
+        ch_end = ch_start + samps_per_frame[ch]
+        ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
+        signal.append(ch_signal)
+        ch_start = ch_end
+    # Skew the signal
+    signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)
 
     # Integrity check of signal shape after reading
     _check_sig_dims(signal, read_len, n_sig, samps_per_frame)