From 3c8abbbc7402a34482e75b18c7497ba9ed8a12a0 Mon Sep 17 00:00:00 2001 From: Ben Walsh Date: Tue, 12 Jul 2011 16:47:12 +0100 Subject: [PATCH 1/2] Various fixes for datetime arrays. --- numpy/core/_internal.py | 7 +++++-- numpy/lib/_iotools.py | 33 ++++++++++++++++++++++++--------- numpy/lib/tests/test_io.py | 9 +++++++++ numpy/ma/core.py | 4 ++++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index 99e64d47558d..e46e1441a3f3 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -90,8 +90,11 @@ def _array_descr(descriptor): else: new = descriptor.metadata.copy() # Eliminate any key related to internal implementation - _ = new.pop(METADATA_DTSTR, None) - return (descriptor.str, new) + new.pop(METADATA_DTSTR, None) + if new: + return (descriptor.str, new) + else: + return descriptor.str else: return (_array_descr(subdtype[0]), subdtype[1]) diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 27c1e76db6b3..7921b4116610 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -503,10 +503,25 @@ class StringConverter(object): (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) # @classmethod + def _getdtype(cls, val): + """Returns the dtype of the input variable.""" + return np.array(val).dtype + # + @classmethod def _getsubdtype(cls, val): """Returns the type of the dtype of the input variable.""" return np.array(val).dtype.type # + # This is a bit annoying. We want to return the "general" type in most cases + # (ie. "string" rather than "S10"), but we want to return the specific type + # for datetime64 (ie. "datetime64[us]" rather than "datetime64"). + @classmethod + def _dtypeortype(cls, dtype): + """Returns dtype for datetime64 and type of dtype otherwise.""" + if dtype.type == np.datetime64: + return dtype + return dtype.type + # @classmethod def upgrade_mapper(cls, func, default=None): """ @@ -561,12 +576,12 @@ def __init__(self, dtype_or_func=None, default=None, missing_values=None, self.func = str2bool self._status = 0 self.default = default or False - ttype = np.bool + dtype = np.dtype('bool') else: # Is the input a np.dtype ? try: self.func = None - ttype = np.dtype(dtype_or_func).type + dtype = np.dtype(dtype_or_func) except TypeError: # dtype_or_func must be a function, then if not hasattr(dtype_or_func, '__call__'): @@ -581,11 +596,11 @@ def __init__(self, dtype_or_func=None, default=None, missing_values=None, default = self.func(asbytes('0')) except ValueError: default = None - ttype = self._getsubdtype(default) + dtype = self._getdtype(default) # Set the status according to the dtype _status = -1 for (i, (deftype, func, default_def)) in enumerate(self._mapper): - if np.issubdtype(ttype, deftype): + if np.issubdtype(dtype.type, deftype): _status = i if default is None: self.default = default_def @@ -603,9 +618,9 @@ def __init__(self, dtype_or_func=None, default=None, missing_values=None, # If the status is 1 (int), change the function to # something more robust. if self.func == self._mapper[1][1]: - if issubclass(ttype, np.uint64): + if issubclass(dtype.type, np.uint64): self.func = np.uint64 - elif issubclass(ttype, np.int64): + elif issubclass(dtype.type, np.int64): self.func = np.int64 else: self.func = lambda x : int(float(x)) @@ -618,7 +633,7 @@ def __init__(self, dtype_or_func=None, default=None, missing_values=None, self.missing_values = set(list(missing_values) + [asbytes('')]) # self._callingfunction = self._strict_call - self.type = ttype + self.type = self._dtypeortype(dtype) self._checked = False self._initial_default = default # @@ -747,13 +762,13 @@ def update(self, func, default=None, testing_value=None, # Don't reset the default to None if we can avoid it if default is not None: self.default = default - self.type = self._getsubdtype(default) + self.type = self._dtypeortype(self._getdtype(default)) else: try: tester = func(testing_value or asbytes('1')) except (TypeError, ValueError): tester = None - self.type = self._getsubdtype(tester) + self.type = self._dtypeortype(self._getdtype(tester)) # Add the missing values to the existing set if missing_values is not None: if _is_bytes_like(missing_values): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index e83c82ecd134..f9da258dc583 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -776,6 +776,15 @@ def test_converters_cornercases(self): dtype=[('date', np.object_), ('stid', float)]) assert_equal(test, control) + def test_converters_cornercases2(self): + "Test the conversion to datetime64." + converter = {'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} + data = StringIO('2009-02-03 12:00:00Z, 72214.0') + test = np.ndfromtxt(data, delimiter=',', dtype=None, + names=['date', 'stid'], converters=converter) + control = np.array((datetime(2009, 02, 03), 72214.), + dtype=[('date', 'datetime64[us]'), ('stid', float)]) + assert_equal(test, control) def test_unused_converter(self): "Test whether unused converters are forgotten" diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 4c40a7c30437..72aa0b2c4d43 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -143,6 +143,8 @@ class MaskError(MAError): 'u' : 999999, 'V' : '???', 'U' : 'N/A', + 'M8[D]' : np.datetime64('1970-01-01'), + 'M8[us]' : np.datetime64('1970-01-01 00:00:00.000000Z') } max_filler = ntypes._minvals max_filler.update([(k, -np.inf) for k in [np.float32, np.float64]]) @@ -198,6 +200,8 @@ def default_fill_value(obj): elif isinstance(obj, np.dtype): if obj.subdtype: defval = default_filler.get(obj.subdtype[0].kind, '?') + elif obj.kind == 'M': + defval = default_filler.get(obj.str[1:], '?') else: defval = default_filler.get(obj.kind, '?') elif isinstance(obj, float): From a3b9928a9e0ce778a00d625fad1601a6a4e138b8 Mon Sep 17 00:00:00 2001 From: Ben Walsh Date: Tue, 12 Jul 2011 17:50:48 +0100 Subject: [PATCH 2/2] Masked arrays now use NaT as default filler value. --- numpy/ma/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 72aa0b2c4d43..e2e954a97c27 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -143,8 +143,8 @@ class MaskError(MAError): 'u' : 999999, 'V' : '???', 'U' : 'N/A', - 'M8[D]' : np.datetime64('1970-01-01'), - 'M8[us]' : np.datetime64('1970-01-01 00:00:00.000000Z') + 'M8[D]' : np.datetime64('NaT', 'D'), + 'M8[us]' : np.datetime64('NaT', 'us') } max_filler = ntypes._minvals max_filler.update([(k, -np.inf) for k in [np.float32, np.float64]])