From 04ae55fc8f3e3c687224fd000d33feda94885769 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 30 Sep 2021 11:59:36 -0400 Subject: [PATCH 1/8] Fix documentation of the internal variable 'filebytes'. This variable contains the complete contents of the input annotation file, as a numpy array of pairs of bytes (shape=(N,2), dtype='uint8'). It is neither a str nor a bytes object. --- wfdb/io/annotation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index 7ae619bb..2ef078f6 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -1748,8 +1748,8 @@ def load_byte_pairs(record_name, extension, pn_dir): Returns ------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. """ # local file @@ -1769,8 +1769,8 @@ def proc_ann_bytes(filebytes, sampto): Parameters ---------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. sampto : int The maximum sample number for annotations to be returned. @@ -1852,8 +1852,8 @@ def proc_core_fields(filebytes, bpi): Parameters ---------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. bpi : int The index to start the conversion. From 2dd48457f5736ed0841cc37e9886cc08634096a5 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Wed, 15 Sep 2021 17:08:47 -0400 Subject: [PATCH 2/8] rdann: handle multiple consecutive SKIPs. In WFDB-format annotation files, annotation timestamps are represented as an offset from the previous annotation. When this offset is less than 0 or greater than 1023, a SKIP pseudo-annotation is used; when the offset is greater than 2**31 - 1 or less than -2**31, multiple SKIPs must be used. Thus, proc_core_fields must be able to handle an arbitrary number of SKIPs in a row, preceding the actual annotation, and add all of the offsets together to obtain the final timestamp. --- wfdb/io/annotation.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index 2ef078f6..db6b5b9f 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -1869,31 +1869,26 @@ def proc_core_fields(filebytes, bpi): The index to start the conversion. """ - label_store = filebytes[bpi, 1] >> 2 + sample_diff = 0 # The current byte pair will contain either the actual d_sample + annotation store value, # or 0 + SKIP. - - # Not a skip - it is the actual sample number + annotation type store value - if label_store != 59: - sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3) - bpi = bpi + 1 - # Skip. Note: Could there be another skip after the first? - else: + while filebytes[bpi, 1] >> 2 == 59: # 4 bytes storing dt - sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \ + skip_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \ + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1] # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1 - if sample_diff > 2147483647: - sample_diff = sample_diff - 4294967296 + if skip_diff > 2147483647: + skip_diff = skip_diff - 4294967296 - # After the 4 bytes, the next pair's samp is also added - sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3) + sample_diff += skip_diff + bpi = bpi + 3 - # The label is stored after the 4 bytes. Samples here should be 0. - label_store = filebytes[bpi + 3, 1] >> 2 - bpi = bpi + 4 + # Not a skip - it is the actual sample number + annotation type store value + label_store = filebytes[bpi, 1] >> 2 + sample_diff += filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3) + bpi = bpi + 1 return sample_diff, label_store, bpi From 94449b85fb145a5a6a1dff6b50d10c5401b17027 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 30 Sep 2021 12:08:47 -0400 Subject: [PATCH 3/8] rdann: compute timestamps as int, not a numpy integer. When reading an annotation file in WFDB format, the timestamp (sample number) must be computed by adding up the relative timestamp difference for each annotation. For long records, sample numbers can easily exceed 2**32. The input to proc_core_fields is a numpy array, so if we operate on the byte values with ordinary arithmetic operations, the result will be a numpy integer object with numpy's default precision (i.e., int32 on 32-bit architectures, int64 on 64-bit architectures.) Instead, calculate the result as a Python integer, to avoid architecture-dependent behavior and (possible) silent wrapping. (Furthermore, use left-shift operations instead of multiplying by constants that are hard to remember.) --- wfdb/io/annotation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index db6b5b9f..bc41e8da 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -1875,8 +1875,10 @@ def proc_core_fields(filebytes, bpi): # or 0 + SKIP. while filebytes[bpi, 1] >> 2 == 59: # 4 bytes storing dt - skip_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \ - + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1] + skip_diff = ((int(filebytes[bpi + 1, 0]) << 16) + + (int(filebytes[bpi + 1, 1]) << 24) + + (int(filebytes[bpi + 2, 0]) << 0) + + (int(filebytes[bpi + 2, 1]) << 8)) # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1 if skip_diff > 2147483647: @@ -1887,7 +1889,7 @@ def proc_core_fields(filebytes, bpi): # Not a skip - it is the actual sample number + annotation type store value label_store = filebytes[bpi, 1] >> 2 - sample_diff += filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3) + sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)) bpi = bpi + 1 return sample_diff, label_store, bpi From c098e38b2945da6dce7fd6ea9e759bf4d7d94bfc Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Thu, 30 Sep 2021 12:26:23 -0400 Subject: [PATCH 4/8] rdann: store sample as an array of int64. For long records, annotation timestamps (sample numbers) can easily exceed the range of a numpy 'int' on 32-bit architectures. Therefore, store the 'sample' array as 'int64' instead. --- wfdb/io/annotation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index bc41e8da..cf9c9da7 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -1653,8 +1653,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False, subtype, chan, num, aux_note) # Convert lists to numpy arrays dtype='int' - (sample, label_store, subtype, - chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num) + (label_store, subtype, + chan, num) = lists_to_int_arrays(label_store, subtype, chan, num) + + # Convert sample numbers to a numpy array of 'int64' + sample = np.array(sample, dtype='int64') # Try to get fs from the header file if it is not contained in the # annotation file From 481978e7c7f51f9796d9d95dc0ca8a5d25708eda Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Wed, 15 Sep 2021 17:09:16 -0400 Subject: [PATCH 5/8] wrann: allow intervals larger than 2**31 - 1. If the gap between two consecutive annotation timestamps is greater than 2**31 - 1 ticks, it must be represented as two or more SKIP pseudo-annotations. Handle this correctly in field2bytes() (to actually generate the correct byte sequences) and in Annotation.check_field() (to permit the application to specify such a gap.) (Previously, if there was a gap of exactly 2**31 ticks, this would not be caught by check_field, and field2bytes would incorrectly generate a SKIP of -2**31 instead.) --- wfdb/io/annotation.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index cf9c9da7..041083cd 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -466,8 +466,6 @@ def check_field(self, field): raise ValueError("The 'sample' field must only contain non-negative integers") if min(sampdiffs) < 0 : raise ValueError("The 'sample' field must contain monotonically increasing sample numbers") - if max(sampdiffs) > 2147483648: - raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31') elif field == 'label_store': if min(item) < 1 or max(item) > 49: @@ -1370,19 +1368,19 @@ def field2bytes(field, value): # sample difference sd = value[0] - # Add SKIP element if value is too large for single byte - if sd>1023: - # 8 bytes in total: - # - [0, 59>>2] indicates SKIP - # - Next 4 gives sample difference - # - Final 2 give 0 and sym - data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode] - # Just need samp and sym - else: - # - First byte stores low 8 bits of samp - # - Second byte stores high 2 bits of samp - # and sym - data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode] + data_bytes = [] + # Add SKIP elements if value is too large + while sd > 0x7fffffff: + data_bytes += [0, 59 << 2, 0xff, 0x7f, 0xff, 0xff] + sd -= 0x7fffffff + if sd > 1023: + data_bytes += [0, 59 << 2, + (sd >> 16) & 255, + (sd >> 24) & 255, + (sd >> 0) & 255, + (sd >> 8) & 255] + sd = 0 + data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode] elif field == 'num': # First byte stores num From 0dab235621183c89174486517d84489e79018b93 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Wed, 29 Sep 2021 11:54:31 -0400 Subject: [PATCH 6/8] test_annotation: convert to a standard unittest.TestCase. Make the test_annotation class a subclass of unittest.TestCase, allowing it to use standard unit testing utility methods, as well as setup and teardown functions. (nosetests will run "test" class methods automatically even if they are not subclasses of TestCase, but unittest won't.) Rename the class to TestAnnotation for consistency. Make the module executable (invoke unittest.main()) so it can be invoked simply using 'python3 -m tests.test_annotation'. Ensure that temporary files created by the annotation tests will be correctly cleaned up by TestAnnotation.tearDownClass() rather than by the unrelated TestRecord.tearDownClass(). (Presumably this only happened to work previously because "test_record" comes alphabetically after "test_annotation".) --- tests/test_annotation.py | 20 +++++++++++++++++++- tests/test_record.py | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tests/test_annotation.py b/tests/test_annotation.py index 32f39082..4fc33cc3 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -1,10 +1,13 @@ +import os import re +import unittest import numpy as np import wfdb -class test_annotation(): + +class TestAnnotation(unittest.TestCase): """ Testing read and write of WFDB annotations, including Physionet streaming. @@ -183,3 +186,18 @@ def test_3(self): assert (comp == [True] * 6) assert annotation.__eq__(pn_annotation) assert annotation.__eq__(write_annotation) + + @classmethod + def tearDownClass(cls): + writefiles = [ + '100.atr', + '1003.atr', + '12726.anI', + ] + for file in writefiles: + if os.path.isfile(file): + os.remove(file) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_record.py b/tests/test_record.py index 89fbc941..b71ffba1 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -521,9 +521,9 @@ def test_header_with_non_utf8(self): @classmethod def tearDownClass(cls): "Clean up written files" - writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat', - '100.hea','1003.atr','100_3chan.dat','100_3chan.hea', - '12726.anI','a103l.hea','a103l.mat','s0010_re.dat', + writefiles = ['03700181.dat','03700181.hea','100.dat', + '100.hea','100_3chan.dat','100_3chan.hea', + 'a103l.hea','a103l.mat','s0010_re.dat', 's0010_re.hea','s0010_re.xyz','test01_00s.dat', 'test01_00s.hea','test01_00s_skewframe.hea', 'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea'] From aad7b14d09998d8581ab0f5ee3aec0f3d2e3816d Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Wed, 29 Sep 2021 12:35:26 -0400 Subject: [PATCH 7/8] Add test cases for reading/writing huge skips. Check that we can both read and write an annotation file containing a relative offset of more than 2**31 - 1 ticks, which necessitates the use of multiple SKIP pseudo-annotations. --- sample-data/huge.qrs | Bin 0 -> 34 bytes tests/test_annotation.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 sample-data/huge.qrs diff --git a/sample-data/huge.qrs b/sample-data/huge.qrs new file mode 100644 index 0000000000000000000000000000000000000000..f48e8e03e317aeaf3b4ed21a7bb96f5d3c9ab2a3 GIT binary patch literal 34 YcmZR0^S}Q8e+DchcL>W91{MYe0FvPo*Z=?k literal 0 HcmV?d00001 diff --git a/tests/test_annotation.py b/tests/test_annotation.py index 4fc33cc3..c7d0f4d3 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -187,12 +187,28 @@ def test_3(self): assert annotation.__eq__(pn_annotation) assert annotation.__eq__(write_annotation) + def test_4(self): + """ + Read and write annotations with large time skips + + Annotation file created by: + echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs + """ + annotation = wfdb.rdann('sample-data/huge', 'qrs') + self.assertEqual(annotation.sample[0], 10000000000) + annotation.wrann() + + annotation1 = wfdb.rdann('sample-data/huge', 'qrs') + annotation2 = wfdb.rdann('huge', 'qrs') + self.assertEqual(annotation1, annotation2) + @classmethod def tearDownClass(cls): writefiles = [ '100.atr', '1003.atr', '12726.anI', + 'huge.qrs', ] for file in writefiles: if os.path.isfile(file): From bfa0a37fd13464586cb102354e6a6d74946a6886 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Mon, 1 Nov 2021 16:32:12 -0400 Subject: [PATCH 8/8] field2bytes: rearrange and add comments for clarity. --- wfdb/io/annotation.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index 041083cd..b3d30e1d 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -1369,17 +1369,28 @@ def field2bytes(field, value): sd = value[0] data_bytes = [] - # Add SKIP elements if value is too large - while sd > 0x7fffffff: - data_bytes += [0, 59 << 2, 0xff, 0x7f, 0xff, 0xff] - sd -= 0x7fffffff - if sd > 1023: + + # Add SKIP element(s) if the sample difference is too large to + # be stored in the annotation type word. + # + # Each SKIP element consists of three words (6 bytes): + # - Bytes 0-1 contain the SKIP indicator (59 << 10) + # - Bytes 2-3 contain the high 16 bits of the sample difference + # - Bytes 4-5 contain the low 16 bits of the sample difference + # If the total difference exceeds 2**31 - 1, multiple skips must + # be used. + while sd > 1023: + n = min(sd, 0x7fffffff) data_bytes += [0, 59 << 2, - (sd >> 16) & 255, - (sd >> 24) & 255, - (sd >> 0) & 255, - (sd >> 8) & 255] - sd = 0 + (n >> 16) & 255, + (n >> 24) & 255, + (n >> 0) & 255, + (n >> 8) & 255] + sd -= n + + # Annotation type itself is stored as a single word: + # - bits 0 to 9 store the sample difference (0 to 1023) + # - bits 10 to 15 store the type code data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode] elif field == 'num':