diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 1e508f3e5dda..042d3235c1a3 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -468,7 +468,7 @@ def read_array(fp): def open_memmap(filename, mode='r+', dtype=None, shape=None, - fortran_order=False, version=(1,0)): + fortran_order=False, version=(1,0), offset=0): """ Open a .npy file as a memory-mapped array. @@ -489,8 +489,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, which results in a data-type of `float64`. shape : tuple of int The shape of the array if we are creating a new file in "write" - mode, in which case this parameter is required. Otherwise, this - parameter is ignored and is thus optional. + mode, in which case this parameter is required. If opening an + existing file, `shape` may specify a (contiguous) slice to open. + Otherwise, this parameter is ignored. fortran_order : bool, optional Whether the array should be Fortran-contiguous (True) or C-contiguous (False, the default) if we are creating a new file @@ -498,6 +499,8 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, version : tuple of int (major, minor) If the mode is a "write" mode, then this is the version of the file format used to create the file. Default: (1,0) + offset : int, optional + Number of elements to skip along the first dimension. Returns ------- @@ -521,6 +524,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, " existing file handles.") if 'w' in mode: + assert offset == 0, "Cannot specify offset when creating memmap" # We are creating the file, not reading it. # Check if we ought to create the file. if version != (1, 0): @@ -553,11 +557,17 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, if version != (1, 0): msg = "only support version (1,0) of file format, not %r" raise ValueError(msg % (version,)) - shape, fortran_order, dtype = read_array_header_1_0(fp) + fullshape, fortran_order, dtype = read_array_header_1_0(fp) + if shape is None: + shape = fullshape + if offset: + shape = list(fullshape) + shape[0] = shape[0] - offset + shape = tuple(shape) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) - offset = fp.tell() + offset = fp.tell() + offset * dtype.itemsize finally: fp.close() @@ -575,3 +585,4 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, mode=mode, offset=offset) return marray + diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 3f4db4593816..1218ad3e9cc8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -262,7 +262,7 @@ def __contains__(self, key): return self.files.__contains__(key) -def load(file, mmap_mode=None): +def load(file, mmap_mode=None, offset=0, shape=None): """ Load a pickled, ``.npy``, or ``.npz`` binary file. @@ -324,6 +324,9 @@ def load(file, mmap_mode=None): """ import gzip + if (not mmap_mode) and (offset or shape): + raise ValueError("Offset and shape should be used only with mmap_mode") + own_fid = False if isinstance(file, basestring): fid = open(file, "rb") @@ -345,7 +348,7 @@ def load(file, mmap_mode=None): return NpzFile(fid, own_fid=True) elif magic == format.MAGIC_PREFIX: # .npy file if mmap_mode: - return format.open_memmap(file, mode=mmap_mode) + return open_memmap(file, mode=mmap_mode, shape=shape, offset=offset) else: return format.read_array(fid) else: # Try a pickle @@ -1740,3 +1743,4 @@ def recfromcsv(fname, **kwargs): else: output = output.view(np.recarray) return output +