From 086d42d8b315cacf04ccaf4a805dc2fc7c137fee Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 25 Oct 2015 23:41:11 -0400 Subject: [PATCH 1/2] TST: Remove tests of view safety checks (see next commit) Remove unit tests for the view safety chekcs, which are to be reverted in the next commit. --- numpy/core/tests/test_multiarray.py | 83 ---------------------------- numpy/lib/tests/test_recfunctions.py | 10 ---- 2 files changed, 93 deletions(-) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 85b0e5519ee8..116348667088 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -5829,89 +5829,6 @@ def test_collections_hashable(self): x = np.array([]) self.assertFalse(isinstance(x, collections.Hashable)) -from numpy.core._internal import _view_is_safe - -class TestObjViewSafetyFuncs(TestCase): - def test_view_safety(self): - psize = np.dtype('p').itemsize - - # creates dtype but with extra character code - for missing 'p' fields - def mtype(s): - n, offset, fields = 0, 0, [] - for c in s.split(','): # subarrays won't work - if c != '-': - fields.append(('f{0}'.format(n), c, offset)) - n += 1 - offset += np.dtype(c).itemsize if c != '-' else psize - - names, formats, offsets = zip(*fields) - return np.dtype({'names': names, 'formats': formats, - 'offsets': offsets, 'itemsize': offset}) - - # test nonequal itemsizes with objects: - # these should succeed: - _view_is_safe(np.dtype('O,p,O,p'), np.dtype('O,p,O,p,O,p')) - _view_is_safe(np.dtype('O,O'), np.dtype('O,O,O')) - # these should fail: - assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,O')) - assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,p')) - assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('p,O')) - - # test nonequal itemsizes with missing fields: - # these should succeed: - _view_is_safe(mtype('-,p,-,p'), mtype('-,p,-,p,-,p')) - _view_is_safe(np.dtype('p,p'), np.dtype('p,p,p')) - # these should fail: - assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,p')) - assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,-')) - assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('-,p')) - - # scans through positions at which we can view a type - def scanView(d1, otype): - goodpos = [] - for shift in range(d1.itemsize - np.dtype(otype).itemsize+1): - d2 = np.dtype({'names': ['f0'], 'formats': [otype], - 'offsets': [shift], 'itemsize': d1.itemsize}) - try: - _view_is_safe(d1, d2) - except TypeError: - pass - else: - goodpos.append(shift) - return goodpos - - # test partial overlap with object field - assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'p'), - [0] + list(range(2*psize, 3*psize+1))) - assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'O'), - [psize, 4*psize, 5*psize]) - - # test partial overlap with missing field - assert_equal(scanView(mtype('p,-,p,p,-,-'), 'p'), - [0] + list(range(2*psize, 3*psize+1))) - - # test nested structures with objects: - nestedO = np.dtype([('f0', 'p'), ('f1', 'p,O,p')]) - assert_equal(scanView(nestedO, 'p'), list(range(psize+1)) + [3*psize]) - assert_equal(scanView(nestedO, 'O'), [2*psize]) - - # test nested structures with missing fields: - nestedM = np.dtype([('f0', 'p'), ('f1', mtype('p,-,p'))]) - assert_equal(scanView(nestedM, 'p'), list(range(psize+1)) + [3*psize]) - - # test subarrays with objects - subarrayO = np.dtype('p,(2,3)O,p') - assert_equal(scanView(subarrayO, 'p'), [0, 7*psize]) - assert_equal(scanView(subarrayO, 'O'), - list(range(psize, 6*psize+1, psize))) - - #test dtype with overlapping fields - overlapped = np.dtype({'names': ['f0', 'f1', 'f2', 'f3'], - 'formats': ['p', 'p', 'p', 'p'], - 'offsets': [0, 1, 3*psize-1, 3*psize], - 'itemsize': 4*psize}) - assert_equal(scanView(overlapped, 'p'), [0, 1, 3*psize-1, 3*psize]) - class TestArrayPriority(TestCase): # This will go away when __array_priority__ is settled, meanwhile diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 09cc29dc114c..699a04716d69 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -720,15 +720,5 @@ def test_append_to_objects(self): dtype=[('A', object), ('B', float), ('C', int)]) assert_equal(test, control) - def test_append_with_objects(self): - "Test append_fields when the appended data contains objects" - obj = self.data['obj'] - x = np.array([(10, 1.), (20, 2.)], dtype=[('A', int), ('B', float)]) - y = np.array([obj, obj], dtype=object) - test = append_fields(x, 'C', data=y, dtypes=object, usemask=False) - control = np.array([(10, 1.0, obj), (20, 2.0, obj)], - dtype=[('A', int), ('B', float), ('C', object)]) - assert_equal(test, control) - if __name__ == '__main__': run_module_suite() From 796a5f87c11fdc3a345fca5448b27a02856c2e4d Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 25 Oct 2015 23:29:06 -0400 Subject: [PATCH 2/2] BUG: revert view safety checks Because of slowdowns caused by the view safety checks introduced in #5548 they are removed here for 1.10. The plan is to reintroduce a better version of the checks in 1.11. --- numpy/core/_internal.py | 137 ++++------------------------------------ 1 file changed, 13 insertions(+), 124 deletions(-) diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index 3ddc2c64d890..47c933411123 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -306,94 +306,6 @@ def _copy_fields(ary): 'formats': [dt.fields[name][0] for name in dt.names]} return array(ary, dtype=copy_dtype, copy=True) -def _get_all_field_offsets(dtype, base_offset=0): - """ Returns the types and offsets of all fields in a (possibly structured) - data type, including nested fields and subarrays. - - Parameters - ---------- - dtype : data-type - Data type to extract fields from. - base_offset : int, optional - Additional offset to add to all field offsets. - - Returns - ------- - fields : list of (data-type, int) pairs - A flat list of (dtype, byte offset) pairs. - - """ - fields = [] - if dtype.fields is not None: - for name in dtype.names: - sub_dtype = dtype.fields[name][0] - sub_offset = dtype.fields[name][1] + base_offset - fields.extend(_get_all_field_offsets(sub_dtype, sub_offset)) - else: - if dtype.shape: - sub_offsets = _get_all_field_offsets(dtype.base, base_offset) - count = 1 - for dim in dtype.shape: - count *= dim - fields.extend((typ, off + dtype.base.itemsize*j) - for j in range(count) for (typ, off) in sub_offsets) - else: - fields.append((dtype, base_offset)) - return fields - -def _check_field_overlap(new_fields, old_fields): - """ Perform object memory overlap tests for two data-types (see - _view_is_safe). - - This function checks that new fields only access memory contained in old - fields, and that non-object fields are not interpreted as objects and vice - versa. - - Parameters - ---------- - new_fields : list of (data-type, int) pairs - Flat list of (dtype, byte offset) pairs for the new data type, as - returned by _get_all_field_offsets. - old_fields: list of (data-type, int) pairs - Flat list of (dtype, byte offset) pairs for the old data type, as - returned by _get_all_field_offsets. - - Raises - ------ - TypeError - If the new fields are incompatible with the old fields - - """ - - #first go byte by byte and check we do not access bytes not in old_fields - new_bytes = set() - for tp, off in new_fields: - new_bytes.update(set(range(off, off+tp.itemsize))) - old_bytes = set() - for tp, off in old_fields: - old_bytes.update(set(range(off, off+tp.itemsize))) - if new_bytes.difference(old_bytes): - raise TypeError("view would access data parent array doesn't own") - - #next check that we do not interpret non-Objects as Objects, and vv - obj_offsets = [off for (tp, off) in old_fields if tp.type is object_] - obj_size = dtype(object_).itemsize - - for fld_dtype, fld_offset in new_fields: - if fld_dtype.type is object_: - # check we do not create object views where - # there are no objects. - if fld_offset not in obj_offsets: - raise TypeError("cannot view non-Object data as Object type") - else: - # next check we do not create non-object views - # where there are already objects. - # see validate_object_field_overlap for a similar computation. - for obj_offset in obj_offsets: - if (fld_offset < obj_offset + obj_size and - obj_offset < fld_offset + fld_dtype.itemsize): - raise TypeError("cannot view Object as non-Object type") - def _getfield_is_safe(oldtype, newtype, offset): """ Checks safety of getfield for object arrays. @@ -415,10 +327,16 @@ def _getfield_is_safe(oldtype, newtype, offset): If the field access is invalid """ - new_fields = _get_all_field_offsets(newtype, offset) - old_fields = _get_all_field_offsets(oldtype) - # raises if there is a problem - _check_field_overlap(new_fields, old_fields) + if newtype.hasobject or oldtype.hasobject: + if offset == 0 and newtype == oldtype: + return + if oldtype.names: + for name in oldtype.names: + if (oldtype.fields[name][1] == offset and + oldtype.fields[name][0] == newtype): + return + raise TypeError("Cannot get/set field of an object array") + return def _view_is_safe(oldtype, newtype): """ Checks safety of a view involving object arrays, for example when @@ -426,13 +344,6 @@ def _view_is_safe(oldtype, newtype): np.zeros(10, dtype=oldtype).view(newtype) - We need to check that - 1) No memory that is not an object will be interpreted as a object, - 2) No memory containing an object will be interpreted as an arbitrary type. - Both cases can cause segfaults, eg in the case the view is written to. - Strategy here is to also disallow views where newtype has any field in a - place oldtype doesn't. - Parameters ---------- oldtype : data-type @@ -452,31 +363,9 @@ def _view_is_safe(oldtype, newtype): if oldtype == newtype: return - new_fields = _get_all_field_offsets(newtype) - new_size = newtype.itemsize - - old_fields = _get_all_field_offsets(oldtype) - old_size = oldtype.itemsize - - # if the itemsizes are not equal, we need to check that all the - # 'tiled positions' of the object match up. Here, we allow - # for arbirary itemsizes (even those possibly disallowed - # due to stride/data length issues). - if old_size == new_size: - new_num = old_num = 1 - else: - gcd_new_old = _gcd(new_size, old_size) - new_num = old_size // gcd_new_old - old_num = new_size // gcd_new_old - - # get position of fields within the tiling - new_fieldtile = [(tp, off + new_size*j) - for j in range(new_num) for (tp, off) in new_fields] - old_fieldtile = [(tp, off + old_size*j) - for j in range(old_num) for (tp, off) in old_fields] - - # raises if there is a problem - _check_field_overlap(new_fieldtile, old_fieldtile) + if newtype.hasobject or oldtype.hasobject: + raise TypeError("Cannot change data-type for object array.") + return # Given a string containing a PEP 3118 format specifier, # construct a Numpy dtype