fix datetime conversion

cx1111 · cx1111 · commit 63c987f33098 · 2018-04-27T12:14:55.000-04:00
diff --git a/demo.ipynb b/demo.ipynb
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -272,10 +272,13 @@ def test_4b(self):
         assert record.__eq__(record_pb)
         assert record.__eq__(record_write)
 
-    # Format 12 multi-samples per frame and skew/Selected Duration/Selected Channels/Physical
-    # Target file created with: rdsamp -r sample-data/03700181 -f 8 -t 128 -s 0
-    # 2 -P | cut -f 2- > io-4c
+
     def test_4c(self):
+        """
+        Format 12 multi-samples per frame and skew/Selected Duration/Selected Channels/Physical
+        Target file created with: rdsamp -r sample-data/03700181 -f 8 -t 128 -s 0
+        2 -P | cut -f 2- > io-4c
+        """
         sig, fields = wfdb.rdsamp('sample-data/03700181',
                              channels=[0, 2], sampfrom=1000, sampto=16000)
         sig_round = np.round(sig, decimals=8)
diff --git a/wfdb/io/_header.py b/wfdb/io/_header.py
@@ -10,7 +10,7 @@
 
 import pdb
 int_types = (int, np.int64, np.int32, np.int16, np.int8)
-float_types = int_types + (float, np.float64, np.float32)
+float_types = (float, np.float64, np.float32) + int_types
 
 """
 WFDB field specifications for each field. The indexes are the field
@@ -123,7 +123,7 @@
             "(?P<n_sig>\d+)[ \t]*",
             "(?P<fs>\d*\.?\d*)/*(?P<counterfs>\d*\.?\d*)\(?(?P<base_counter>\d*\.?\d*)\)?[ \t]*",
             "(?P<sig_len>\d*)[ \t]*",
-            "(?P<base_time>\d*:?\d{,2}:?\d{,2}\.?\d*)[ \t]*",
+            "(?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})[ \t]*",
             "(?P<base_date>\d{,2}/?\d{,2}/?\d{,4})"]))
 
 # Signal Line Fields
@@ -422,7 +422,8 @@ def check_field_cohesion(self, rec_write_fields, sig_write_fields):
 
     def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
         """
-        Write a header file using the specified fields
+        Write a header file using the specified fields. Converts Record
+        attributes into appropriate wfdb format strings.
 
         Parameters
         ----------
@@ -442,12 +443,21 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
         for field in RECORD_SPECS.index:
             # If the field is being used, add it with its delimiter
             if field in rec_write_fields:
-                stringfield = str(getattr(self, field))
-                # If fs is float, check whether it as an integer
+                string_field = str(getattr(self, field))
+
+                # Certain fields need extra processing
+
                 if field == 'fs' and isinstance(self.fs, float):
                     if round(self.fs, 8) == float(int(self.fs)):
-                        stringfield = str(int(self.fs))
-                record_line += RECORD_SPECS.loc[field, 'delimiter'] + stringfield
+                        string_field = str(int(self.fs))
+                elif field == 'base_time' and '.' in string_field:
+                    string_field = string_field.rstrip('0')
+                elif field == 'base_date':
+                    string_field = '/'.join((string_field[8:],
+                                             string_field[5:7],
+                                             string_field[:4]))
+
+                record_line += RECORD_SPECS.loc[field, 'delimiter'] + string_field
 
         header_lines = [record_line]
 
@@ -478,17 +488,24 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir):
 
 class MultiHeaderMixin(BaseHeaderMixin):
     """
-    Mixin class with multi-segment header methods. Inherited by MultiRecord class.
+    Mixin class with multi-segment header methods. Inherited by
+    MultiRecord class.
     """
 
-    # Set defaults for fields needed to write the header if they have defaults.
-    # This is NOT called by rdheader. It is only called by the gateway wrsamp for convenience.
-    # It is also not called by wrhea (this may be changed in the future) since
-    # it is supposed to be an explicit function.
-
-    # Not responsible for initializing the
-    # attribute. That is done by the constructor.
     def set_defaults(self):
+        """
+        Set defaults for fields needed to write the header if they have
+        defaults.
+
+        This is NOT called by rdheader. It is only called by the gateway
+        wrsamp for convenience.
+
+        It is also not called by wrhea since it is supposed to be an
+        explicit function.
+
+        Not responsible for initializing the
+        attributes. That is done by the constructor.
+        """
         for field in self.get_write_fields():
             self.set_default(field)
 
@@ -630,9 +647,33 @@ def get_sig_name(self):
 
         return sig_name
 
+def wfdb_strptime(time_string):
+    """
+    Given a time string in an acceptable wfdb format, return
+    a datetime.time object.
+
+    Valid formats: SS, MM:SS, HH:MM:SS, all with and without microsec.
+    """
+    n_colons = time_string.count(':')
+
+    if n_colons == 0:
+        time_fmt = '%S'
+    elif n_colons == 1:
+        time_fmt = '%M:%S'
+    elif n_colons == 2:
+        time_fmt = '%H:%M:%S'
+
+    if '.' in time_string:
+        time_fmt += '.%f'
+
+    return datetime.datetime.strptime(time_string, time_fmt).time()
+
 
-# Read header file to get comment and non-comment lines
 def get_header_lines(record_name, pb_dir):
+    """
+    Read a header file to get comment and non-comment lines
+
+    """
     # Read local file
     if pb_dir is None:
         with open(record_name + ".hea", 'r') as fp:
@@ -682,16 +723,24 @@ def _read_record_line(record_line):
         # mostly None)
         if record_fields[field] == '':
             record_fields[field] = RECORD_SPECS.loc[field, 'read_default']
-        # Typecast non-empty strings for numerical and date/time fields
+        # Typecast non-empty strings for non-string (numerical/datetime)
+        # fields
         else:
-            if RECORD_SPECS.loc[field, 'allowed_types'] is int_types:
+            if RECORD_SPECS.loc[field, 'allowed_types'] == int_types:
                 record_fields[field] = int(record_fields[field])
-            # fs may be read as float or int
-            elif field == 'fs':
-                fs = float(record_fields['fs'])
-                if round(fs, 8) == float(int(fs)):
-                    fs = int(fs)
-                record_fields['fs'] = fs
+            elif RECORD_SPECS.loc[field, 'allowed_types'] == float_types:
+                record_fields[field] = float(record_fields[field])
+                # cast fs to an int if it is close
+                if field == 'fs':
+                    fs = float(record_fields['fs'])
+                    if round(fs, 8) == float(int(fs)):
+                        fs = int(fs)
+                    record_fields['fs'] = fs
+            elif field == 'base_time':
+                record_fields['base_time'] = wfdb_strptime(record_fields['base_time'])
+            elif field == 'base_date':
+                record_fields['base_date'] = datetime.datetime.strptime(
+                    record_fields['base_date'], '%d/%m/%Y').date()
 
     return record_fields
 
diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
@@ -204,24 +204,29 @@ def get_label_fields(self):
     # Check the set fields of the annotation object
     def check_fields(self):
         # Check all set fields
-        for field in ann_field_types:
+        for field in ALLOWED_TYPES:
             if getattr(self, field) is not None:
                 # Check the type of the field's elements
                 self.check_field(field)
         return
 
 
-    # Check a particular annotation field
+
     def check_field(self, field):
+        """
+        Check a particular annotation field
+        """
 
         item = getattr(self, field)
 
-        if not isinstance(item, ann_field_types[field]):
-            raise TypeError('The '+field+' field must be one of the following types:', ann_field_types[field])
+        if not isinstance(item, ALLOWED_TYPES[field]):
+            raise TypeError('The '+field+' field must be one of the following types:', ALLOWED_TYPES[field])
 
-        if field in int_ann_fields:
-            if not hasattr(field, '__index__'):
-                raise TypeError('The '+field+' field must have an integer-based dtype.')
+        # Numerical integer annotation fields: sample, label_store, sub,
+        # chan, num
+        if ALLOWED_TYPES[field] == (np.ndarray):
+            record.check_np_array(item=item, field_name=field, ndim=1,
+                                  parent_class=np.integer, channel_num=None)
 
         # Field specific checks
         if field == 'record_name':
@@ -286,13 +291,13 @@ def check_field(self, field):
                     if not hasattr(label_store[i], '__index__'):
                         raise TypeError('The label_store values of the '+field+' field must be integer-like')
 
-                if not isinstance(symbol[i], strtypes) or len(symbol[i]) not in [1,2,3]:
+                if not isinstance(symbol[i], str_types) or len(symbol[i]) not in [1,2,3]:
                     raise ValueError('The symbol values of the '+field+' field must be strings of length 1 to 3')
 
                 if bool(re.search('[ \t\n\r\f\v]', symbol[i])):
                     raise ValueError('The symbol values of the '+field+' field must not contain whitespace characters')
 
-                if not isinstance(description[i], strtypes):
+                if not isinstance(description[i], str_types):
                     raise TypeError('The description values of the '+field+' field must be strings')
 
                 # Would be good to enfore this but existing garbage annotations have tabs and newlines...
@@ -304,7 +309,7 @@ def check_field(self, field):
             uniq_elements = set(item)
 
             for e in uniq_elements:
-                if not isinstance(e, strtypes):
+                if not isinstance(e, str_types):
                     raise TypeError('Subelements of the '+field+' field must be strings')
 
             if field == 'symbol':
@@ -1580,22 +1585,69 @@ def rm_last(*args):
         return [a[:-1] for a in args]
     return
 
-## ------------- /Reading Annotations ------------- ##
+## ------------- Annotation Field Specifications ------------- ##
+
+"""
+WFDB field specifications for each field. The indexes are the field
+names.
+
+Parameters
+----------
+allowed_types:
+    Data types the field (or its elements) can be.
+delimiter:
+    The text delimiter that precedes the field in the header file.
+write_required:
+    Whether the field is required for writing a header (more stringent
+    than origin WFDB library).
+read_default:
+    The default value for the field when read if any. Most fields do not
+    have a default. The reason for the variation, is that we do not want
+    to imply that some fields are present when they are not, unless the
+    field is essential. See the notes.
+write_default:
+    The default value for the field to fill in before writing, if any.
+
+Notes
+-----
+In the original WFDB package, certain fields have default values, but
+not all of them. Some attributes need to be present for core
+functionality, ie. baseline, whereas others are not essential, yet have
+defaults, ie. base_time.
+
+This inconsistency has likely resulted in the generation of incorrect
+files, and general confusion. This library aims to make explicit,
+whether certain fields are present in the file, by setting their values
+to None if they are not written in, unless the fields are essential, in
+which case an actual default value will be set.
+
+The read vs write default values are different for 2 reasons:
+1. We want to force the user to be explicit with certain important
+   fields when writing WFDB records fields, without affecting
+   existing WFDB headers when reading.
+2. Certain unimportant fields may be dependencies of other
+   important fields. When writing, we want to fill in defaults
+   so that the user doesn't need to. But when reading, it should
+   be clear that the fields are missing.
+
+"""
+
 
 # Allowed types of each Annotation object attribute.
-ann_field_types = {'record_name': (str), 'extension': (str), 'sample': (np.ndarray),
-                 'symbol': (list, np.ndarray),  'subtype': (np.ndarray), 'chan': (np.ndarray),
-                 'num': (np.ndarray), 'aux_note': (list, np.ndarray), 'fs': _header.float_types,
-                 'label_store': (np.ndarray), 'description':(list, np.ndarray), 'custom_labels': (pd.DataFrame, list, tuple),
+ALLOWED_TYPES = {'record_name': (str), 'extension': (str),
+                 'sample': (np.ndarray,), 'symbol': (list, np.ndarray),
+                 'subtype': (np.ndarray,), 'chan': (np.ndarray,),
+                 'num': (np.ndarray,), 'aux_note': (list, np.ndarray),
+                 'fs': _header.float_types, 'label_store': (np.ndarray,),
+                 'description':(list, np.ndarray),
+                 'custom_labels': (pd.DataFrame, list, tuple),
                  'contained_labels':(pd.DataFrame, list, tuple)}
 
-strtypes = (str, np.str_)
+str_types = (str, np.str_)
 
 # Elements of the annotation label
 ann_label_fields = ('label_store', 'symbol', 'description')
 
-# Numerical integer annotation fields: sample, label_store, sub, chan, num
-int_ann_fields = [field for field in ann_field_types if ann_field_types[field] == (np.ndarray)]
 
 class AnnotationClass(object):
     def __init__(self, extension, description, human_reviewed):
diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -1,8 +1,5 @@
-# For wrsamp(), the field to use will be d_signal (which is allowed to be empty for 0 channel records).
 # set_p_features and set_d_features use characteristics of the p_signal or d_signal field to fill in other header fields.
 # These are separate from another method 'set_defaults' which the user may call to set default header fields
-# The check_field_cohesion() function will be called in wrheader which checks all the header fields.
-# The check_sig_cohesion() function will be called in wrsamp in wrdat to check the d_signal against the header fields.
 
 import datetime
 import multiprocessing
@@ -110,13 +107,6 @@ def check_field(self, field, required_channels='all'):
         elif field == 'sig_len':
             if self.sig_len < 0:
                 raise ValueError('sig_len must be a non-negative integer')
-        elif field == 'base_time':
-            try:
-                _ = datetime.datetime.strptime(self.base_time, '%H:%M:%S.%f')
-            except ValueError:
-                _ = datetime.datetime.strptime(self.base_time, '%H:%M/%S')
-        elif field == 'base_date':
-            _ = datetime.datetime.strptime(self.base_date, '%d/%m/%Y')
 
         # Signal specification fields
         elif field in _header.SIGNAL_SPECS.index:
@@ -353,7 +343,9 @@ def wrsamp(self, expanded=False, write_dir=''):
 
     def arrange_fields(self, channels, expanded=False):
         """
-        Arrange/edit object fields to reflect user channel and/or signal range input
+        Arrange/edit object fields to reflect user channel and/or signal
+        range input.
+
         Account for case when signals are expanded
         """
 
@@ -793,9 +785,9 @@ def _check_item_type(item, field_name, allowed_types, expect_list=False,
         for ch in range(len(item)):
             # Check whether the field may be None
             if ch in required_channels:
-                allowed_types_ch = allowed_types + (type(None),)
-            else:
                 allowed_types_ch = allowed_types
+            else:
+                allowed_types_ch = allowed_types + (type(None),)
 
             if not isinstance(item[ch], allowed_types_ch):
                 raise TypeError('Channel %d of field `%s` must be one of the following types:' % (ch, field_name),
diff --git a/wfdb/processing/qrs.py b/wfdb/processing/qrs.py
@@ -853,7 +853,6 @@ def sm(self, at_t):
             # from 1 to dt. 0 is never calculated.
             else:
                 v = int(self.at(smt))
-                print(smdt)
                 for j in range(1, smdt):
                     smtpj = self.at(smt + j)
                     smtlj = self.at(smt - j)