Skip to content

Offset memmap #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions numpy/lib/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def read_array(fp):


def open_memmap(filename, mode='r+', dtype=None, shape=None,
fortran_order=False, version=(1,0)):
fortran_order=False, version=(1,0), offset=0):
"""
Open a .npy file as a memory-mapped array.

Expand All @@ -489,15 +489,18 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
which results in a data-type of `float64`.
shape : tuple of int
The shape of the array if we are creating a new file in "write"
mode, in which case this parameter is required. Otherwise, this
parameter is ignored and is thus optional.
mode, in which case this parameter is required. If opening an
existing file, `shape` may specify a (contiguous) slice to open.
Otherwise, this parameter is ignored.
fortran_order : bool, optional
Whether the array should be Fortran-contiguous (True) or
C-contiguous (False, the default) if we are creating a new file
in "write" mode.
version : tuple of int (major, minor)
If the mode is a "write" mode, then this is the version of the file
format used to create the file. Default: (1,0)
offset : int, optional
Number of elements to skip along the first dimension.

Returns
-------
Expand All @@ -521,6 +524,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
" existing file handles.")

if 'w' in mode:
assert offset == 0, "Cannot specify offset when creating memmap"
# We are creating the file, not reading it.
# Check if we ought to create the file.
if version != (1, 0):
Expand Down Expand Up @@ -553,11 +557,17 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
if version != (1, 0):
msg = "only support version (1,0) of file format, not %r"
raise ValueError(msg % (version,))
shape, fortran_order, dtype = read_array_header_1_0(fp)
fullshape, fortran_order, dtype = read_array_header_1_0(fp)
if shape is None:
shape = fullshape
if offset:
shape = list(fullshape)
shape[0] = shape[0] - offset
shape = tuple(shape)
if dtype.hasobject:
msg = "Array can't be memory-mapped: Python objects in dtype."
raise ValueError(msg)
offset = fp.tell()
offset = fp.tell() + offset * dtype.itemsize
finally:
fp.close()

Expand All @@ -575,3 +585,4 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
mode=mode, offset=offset)

return marray

8 changes: 6 additions & 2 deletions numpy/lib/npyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def __contains__(self, key):
return self.files.__contains__(key)


def load(file, mmap_mode=None):
def load(file, mmap_mode=None, offset=0, shape=None):
"""
Load a pickled, ``.npy``, or ``.npz`` binary file.

Expand Down Expand Up @@ -324,6 +324,9 @@ def load(file, mmap_mode=None):
"""
import gzip

if (not mmap_mode) and (offset or shape):
raise ValueError("Offset and shape should be used only with mmap_mode")

own_fid = False
if isinstance(file, basestring):
fid = open(file, "rb")
Expand All @@ -345,7 +348,7 @@ def load(file, mmap_mode=None):
return NpzFile(fid, own_fid=True)
elif magic == format.MAGIC_PREFIX: # .npy file
if mmap_mode:
return format.open_memmap(file, mode=mmap_mode)
return open_memmap(file, mode=mmap_mode, shape=shape, offset=offset)
else:
return format.read_array(fid)
else: # Try a pickle
Expand Down Expand Up @@ -1740,3 +1743,4 @@ def recfromcsv(fname, **kwargs):
else:
output = output.view(np.recarray)
return output