From 04ae55fc8f3e3c687224fd000d33feda94885769 Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Thu, 30 Sep 2021 11:59:36 -0400
Subject: [PATCH 1/8] Fix documentation of the internal variable 'filebytes'.

This variable contains the complete contents of the input annotation
file, as a numpy array of pairs of bytes (shape=(N,2), dtype='uint8').
It is neither a str nor a bytes object.
---
 wfdb/io/annotation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index 7ae619bb..2ef078f6 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -1748,8 +1748,8 @@ def load_byte_pairs(record_name, extension, pn_dir):
 
     Returns
     -------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
 
     """
     # local file
@@ -1769,8 +1769,8 @@ def proc_ann_bytes(filebytes, sampto):
 
     Parameters
     ----------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
     sampto : int
         The maximum sample number for annotations to be returned.
     
@@ -1852,8 +1852,8 @@ def proc_core_fields(filebytes, bpi):
 
     Parameters
     ----------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
     bpi : int
         The index to start the conversion.
 

From 2dd48457f5736ed0841cc37e9886cc08634096a5 Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Wed, 15 Sep 2021 17:08:47 -0400
Subject: [PATCH 2/8] rdann: handle multiple consecutive SKIPs.

In WFDB-format annotation files, annotation timestamps are represented
as an offset from the previous annotation.  When this offset is less
than 0 or greater than 1023, a SKIP pseudo-annotation is used; when
the offset is greater than 2**31 - 1 or less than -2**31, multiple
SKIPs must be used.  Thus, proc_core_fields must be able to handle an
arbitrary number of SKIPs in a row, preceding the actual annotation,
and add all of the offsets together to obtain the final timestamp.
---
 wfdb/io/annotation.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index 2ef078f6..db6b5b9f 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -1869,31 +1869,26 @@ def proc_core_fields(filebytes, bpi):
         The index to start the conversion.
 
     """
-    label_store = filebytes[bpi, 1] >> 2
+    sample_diff = 0
 
     # The current byte pair will contain either the actual d_sample + annotation store value,
     # or 0 + SKIP.
-
-    # Not a skip - it is the actual sample number + annotation type store value
-    if label_store != 59:
-        sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
-        bpi = bpi + 1
-    # Skip. Note: Could there be another skip after the first?
-    else:
+    while filebytes[bpi, 1] >> 2 == 59:
         # 4 bytes storing dt
-        sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
+        skip_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
              + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
 
         # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
-        if sample_diff > 2147483647:
-            sample_diff = sample_diff - 4294967296
+        if skip_diff > 2147483647:
+            skip_diff = skip_diff - 4294967296
 
-        # After the 4 bytes, the next pair's samp is also added
-        sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3)
+        sample_diff += skip_diff
+        bpi = bpi + 3
 
-        # The label is stored after the 4 bytes. Samples here should be 0.
-        label_store = filebytes[bpi + 3, 1] >> 2
-        bpi = bpi + 4
+    # Not a skip - it is the actual sample number + annotation type store value
+    label_store = filebytes[bpi, 1] >> 2
+    sample_diff += filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
+    bpi = bpi + 1
 
     return sample_diff, label_store, bpi
 

From 94449b85fb145a5a6a1dff6b50d10c5401b17027 Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Thu, 30 Sep 2021 12:08:47 -0400
Subject: [PATCH 3/8] rdann: compute timestamps as int, not a numpy integer.

When reading an annotation file in WFDB format, the timestamp (sample
number) must be computed by adding up the relative timestamp
difference for each annotation.  For long records, sample numbers can
easily exceed 2**32.

The input to proc_core_fields is a numpy array, so if we operate on
the byte values with ordinary arithmetic operations, the result will
be a numpy integer object with numpy's default precision (i.e., int32
on 32-bit architectures, int64 on 64-bit architectures.)

Instead, calculate the result as a Python integer, to avoid
architecture-dependent behavior and (possible) silent wrapping.

(Furthermore, use left-shift operations instead of multiplying by
constants that are hard to remember.)
---
 wfdb/io/annotation.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index db6b5b9f..bc41e8da 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -1875,8 +1875,10 @@ def proc_core_fields(filebytes, bpi):
     # or 0 + SKIP.
     while filebytes[bpi, 1] >> 2 == 59:
         # 4 bytes storing dt
-        skip_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
-             + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
+        skip_diff = ((int(filebytes[bpi + 1, 0]) << 16)
+                     + (int(filebytes[bpi + 1, 1]) << 24)
+                     + (int(filebytes[bpi + 2, 0]) << 0)
+                     + (int(filebytes[bpi + 2, 1]) << 8))
 
         # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
         if skip_diff > 2147483647:
@@ -1887,7 +1889,7 @@ def proc_core_fields(filebytes, bpi):
 
     # Not a skip - it is the actual sample number + annotation type store value
     label_store = filebytes[bpi, 1] >> 2
-    sample_diff += filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
+    sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3))
     bpi = bpi + 1
 
     return sample_diff, label_store, bpi

From c098e38b2945da6dce7fd6ea9e759bf4d7d94bfc Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Thu, 30 Sep 2021 12:26:23 -0400
Subject: [PATCH 4/8] rdann: store sample as an array of int64.

For long records, annotation timestamps (sample numbers) can easily
exceed the range of a numpy 'int' on 32-bit architectures.  Therefore,
store the 'sample' array as 'int64' instead.
---
 wfdb/io/annotation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index bc41e8da..cf9c9da7 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -1653,8 +1653,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False,
                                              subtype, chan, num, aux_note)
 
     # Convert lists to numpy arrays dtype='int'
-    (sample, label_store, subtype,
-     chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num)
+    (label_store, subtype,
+     chan, num) = lists_to_int_arrays(label_store, subtype, chan, num)
+
+    # Convert sample numbers to a numpy array of 'int64'
+    sample = np.array(sample, dtype='int64')
 
     # Try to get fs from the header file if it is not contained in the
     # annotation file

From 481978e7c7f51f9796d9d95dc0ca8a5d25708eda Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Wed, 15 Sep 2021 17:09:16 -0400
Subject: [PATCH 5/8] wrann: allow intervals larger than 2**31 - 1.

If the gap between two consecutive annotation timestamps is greater
than 2**31 - 1 ticks, it must be represented as two or more SKIP
pseudo-annotations.  Handle this correctly in field2bytes() (to
actually generate the correct byte sequences) and in
Annotation.check_field() (to permit the application to specify such a
gap.)

(Previously, if there was a gap of exactly 2**31 ticks, this would not
be caught by check_field, and field2bytes would incorrectly generate a
SKIP of -2**31 instead.)
---
 wfdb/io/annotation.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index cf9c9da7..041083cd 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -466,8 +466,6 @@ def check_field(self, field):
                 raise ValueError("The 'sample' field must only contain non-negative integers")
             if min(sampdiffs) < 0 :
                 raise ValueError("The 'sample' field must contain monotonically increasing sample numbers")
-            if max(sampdiffs) > 2147483648:
-                raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31')
 
         elif field == 'label_store':
             if min(item) < 1 or max(item) > 49:
@@ -1370,19 +1368,19 @@ def field2bytes(field, value):
         # sample difference
         sd = value[0]
 
-        # Add SKIP element if value is too large for single byte
-        if sd>1023:
-            # 8 bytes in total:
-            # - [0, 59>>2] indicates SKIP
-            # - Next 4 gives sample difference
-            # - Final 2 give 0 and sym
-            data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode]
-        # Just need samp and sym
-        else:
-            # - First byte stores low 8 bits of samp
-            # - Second byte stores high 2 bits of samp
-            #   and sym
-            data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode]
+        data_bytes = []
+        # Add SKIP elements if value is too large
+        while sd > 0x7fffffff:
+            data_bytes += [0, 59 << 2, 0xff, 0x7f, 0xff, 0xff]
+            sd -= 0x7fffffff
+        if sd > 1023:
+            data_bytes += [0, 59 << 2,
+                           (sd >> 16) & 255,
+                           (sd >> 24) & 255,
+                           (sd >> 0) & 255,
+                           (sd >> 8) & 255]
+            sd = 0
+        data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]
 
     elif field == 'num':
         # First byte stores num

From 0dab235621183c89174486517d84489e79018b93 Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Wed, 29 Sep 2021 11:54:31 -0400
Subject: [PATCH 6/8] test_annotation: convert to a standard unittest.TestCase.

Make the test_annotation class a subclass of unittest.TestCase,
allowing it to use standard unit testing utility methods, as well as
setup and teardown functions.  (nosetests will run "test" class
methods automatically even if they are not subclasses of TestCase, but
unittest won't.)  Rename the class to TestAnnotation for consistency.

Make the module executable (invoke unittest.main()) so it can be
invoked simply using 'python3 -m tests.test_annotation'.

Ensure that temporary files created by the annotation tests will be
correctly cleaned up by TestAnnotation.tearDownClass() rather than by
the unrelated TestRecord.tearDownClass().  (Presumably this only
happened to work previously because "test_record" comes alphabetically
after "test_annotation".)
---
 tests/test_annotation.py | 20 +++++++++++++++++++-
 tests/test_record.py     |  6 +++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/tests/test_annotation.py b/tests/test_annotation.py
index 32f39082..4fc33cc3 100644
--- a/tests/test_annotation.py
+++ b/tests/test_annotation.py
@@ -1,10 +1,13 @@
+import os
 import re
+import unittest
 
 import numpy as np
 
 import wfdb
 
-class test_annotation():
+
+class TestAnnotation(unittest.TestCase):
     """
     Testing read and write of WFDB annotations, including Physionet
     streaming.
@@ -183,3 +186,18 @@ def test_3(self):
         assert (comp == [True] * 6)
         assert annotation.__eq__(pn_annotation)
         assert annotation.__eq__(write_annotation)
+
+    @classmethod
+    def tearDownClass(cls):
+        writefiles = [
+            '100.atr',
+            '1003.atr',
+            '12726.anI',
+        ]
+        for file in writefiles:
+            if os.path.isfile(file):
+                os.remove(file)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_record.py b/tests/test_record.py
index 89fbc941..b71ffba1 100644
--- a/tests/test_record.py
+++ b/tests/test_record.py
@@ -521,9 +521,9 @@ def test_header_with_non_utf8(self):
     @classmethod
     def tearDownClass(cls):
         "Clean up written files"
-        writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat',
-                      '100.hea','1003.atr','100_3chan.dat','100_3chan.hea',
-                      '12726.anI','a103l.hea','a103l.mat','s0010_re.dat',
+        writefiles = ['03700181.dat','03700181.hea','100.dat',
+                      '100.hea','100_3chan.dat','100_3chan.hea',
+                      'a103l.hea','a103l.mat','s0010_re.dat',
                       's0010_re.hea','s0010_re.xyz','test01_00s.dat',
                       'test01_00s.hea','test01_00s_skewframe.hea',
                       'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea']

From aad7b14d09998d8581ab0f5ee3aec0f3d2e3816d Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Wed, 29 Sep 2021 12:35:26 -0400
Subject: [PATCH 7/8] Add test cases for reading/writing huge skips.

Check that we can both read and write an annotation file containing a
relative offset of more than 2**31 - 1 ticks, which necessitates the
use of multiple SKIP pseudo-annotations.
---
 sample-data/huge.qrs     | Bin 0 -> 34 bytes
 tests/test_annotation.py |  16 ++++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 sample-data/huge.qrs

diff --git a/sample-data/huge.qrs b/sample-data/huge.qrs
new file mode 100644
index 0000000000000000000000000000000000000000..f48e8e03e317aeaf3b4ed21a7bb96f5d3c9ab2a3
GIT binary patch
literal 34
YcmZR0^S}Q8e+DchcL>W91{MYe0FvPo*Z=?k

literal 0
HcmV?d00001

diff --git a/tests/test_annotation.py b/tests/test_annotation.py
index 4fc33cc3..c7d0f4d3 100644
--- a/tests/test_annotation.py
+++ b/tests/test_annotation.py
@@ -187,12 +187,28 @@ def test_3(self):
         assert annotation.__eq__(pn_annotation)
         assert annotation.__eq__(write_annotation)
 
+    def test_4(self):
+        """
+        Read and write annotations with large time skips
+
+        Annotation file created by:
+            echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs
+        """
+        annotation = wfdb.rdann('sample-data/huge', 'qrs')
+        self.assertEqual(annotation.sample[0], 10000000000)
+        annotation.wrann()
+
+        annotation1 = wfdb.rdann('sample-data/huge', 'qrs')
+        annotation2 = wfdb.rdann('huge', 'qrs')
+        self.assertEqual(annotation1, annotation2)
+
     @classmethod
     def tearDownClass(cls):
         writefiles = [
             '100.atr',
             '1003.atr',
             '12726.anI',
+            'huge.qrs',
         ]
         for file in writefiles:
             if os.path.isfile(file):

From bfa0a37fd13464586cb102354e6a6d74946a6886 Mon Sep 17 00:00:00 2001
From: Benjamin Moody <benjaminmoody@gmail.com>
Date: Mon, 1 Nov 2021 16:32:12 -0400
Subject: [PATCH 8/8] field2bytes: rearrange and add comments for clarity.

---
 wfdb/io/annotation.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
index 041083cd..b3d30e1d 100644
--- a/wfdb/io/annotation.py
+++ b/wfdb/io/annotation.py
@@ -1369,17 +1369,28 @@ def field2bytes(field, value):
         sd = value[0]
 
         data_bytes = []
-        # Add SKIP elements if value is too large
-        while sd > 0x7fffffff:
-            data_bytes += [0, 59 << 2, 0xff, 0x7f, 0xff, 0xff]
-            sd -= 0x7fffffff
-        if sd > 1023:
+
+        # Add SKIP element(s) if the sample difference is too large to
+        # be stored in the annotation type word.
+        #
+        # Each SKIP element consists of three words (6 bytes):
+        #  - Bytes 0-1 contain the SKIP indicator (59 << 10)
+        #  - Bytes 2-3 contain the high 16 bits of the sample difference
+        #  - Bytes 4-5 contain the low 16 bits of the sample difference
+        # If the total difference exceeds 2**31 - 1, multiple skips must
+        # be used.
+        while sd > 1023:
+            n = min(sd, 0x7fffffff)
             data_bytes += [0, 59 << 2,
-                           (sd >> 16) & 255,
-                           (sd >> 24) & 255,
-                           (sd >> 0) & 255,
-                           (sd >> 8) & 255]
-            sd = 0
+                           (n >> 16) & 255,
+                           (n >> 24) & 255,
+                           (n >> 0) & 255,
+                           (n >> 8) & 255]
+            sd -= n
+
+        # Annotation type itself is stored as a single word:
+        #  - bits 0 to 9 store the sample difference (0 to 1023)
+        #  - bits 10 to 15 store the type code
         data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]
 
     elif field == 'num':