Skip to content

WIP: [2.7] bpo-24658: Fix read/write greater than 2 GiB on macOS (GH-1705) #9938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Include/pyport.h
Original file line number Diff line number Diff line change
Expand Up @@ -947,4 +947,17 @@ typedef struct fd_set {
#define Py_ULL(x) Py_LL(x##U)
#endif

#if defined (MS_WINDOWS) || defined(__APPLE__)
/* On Windows, the count parameter of read() is an int (bpo-9015, bpo-9611).
On macOS 10.13, read() and write() with more than INT_MAX bytes
fail with EINVAL (bpo-24658). */
# define _PY_READ_MAX INT_MAX
# define _PY_WRITE_MAX INT_MAX
#else
/* write() should truncate the input to PY_SSIZE_T_MAX bytes,
but it's safer to do it ourself to have a portable behaviour */
# define _PY_READ_MAX PY_SSIZE_T_MAX
# define _PY_WRITE_MAX PY_SSIZE_T_MAX
#endif

#endif /* Py_PYPORT_H */
12 changes: 11 additions & 1 deletion Lib/test/test_largefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sys
import unittest
from test.test_support import run_unittest, TESTFN, verbose, requires, \
unlink
unlink, bigmemtest
import io # C implementation of io
import _pyio as pyio # Python implementation of io

Expand Down Expand Up @@ -48,6 +48,15 @@ def test_seek(self):
print('check file size with os.fstat')
self.assertEqual(os.fstat(f.fileno())[stat.ST_SIZE], size+1)

# _pyio.FileIO.readall() uses a temporary bytearry then casted to bytes,
# so memuse=2 is needed
@bigmemtest(minsize=size, memuse=2)
def test_large_read(self, _size):
# bpo-24658: Test that a read greater than 2GB does not fail.
with self.open(TESTFN, "rb") as f:
self.assertEqual(len(f.read()), size + 1)
self.assertEqual(f.tell(), size + 1)

def test_osstat(self):
if verbose:
print('check file size with os.stat')
Expand Down Expand Up @@ -186,6 +195,7 @@ class TestCase(LargeFileTest):
suite.addTest(TestCase('test_osstat'))
suite.addTest(TestCase('test_seek_read'))
suite.addTest(TestCase('test_lseek'))
suite.addTest(TestCase('test_large_read'))
with _open(TESTFN, 'wb') as f:
if hasattr(f, 'truncate'):
suite.addTest(TestCase('test_truncate'))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
On macOS, fix reading from and writing into a file with a size larger than 2
GiB.
23 changes: 13 additions & 10 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,9 +515,10 @@ fileio_readinto(fileio *self, PyObject *args)
len = pbuf.len;
Py_BEGIN_ALLOW_THREADS
errno = 0;
if (len > _PY_READ_MAX) {
len = _PY_READ_MAX;
}
#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (len > INT_MAX)
len = INT_MAX;
n = read(self->fd, pbuf.buf, (int)len);
#else
n = read(self->fd, pbuf.buf, len);
Expand Down Expand Up @@ -604,9 +605,10 @@ fileio_readall(fileio *self)
Py_BEGIN_ALLOW_THREADS
errno = 0;
n = newsize - total;
if (n > _PY_READ_MAX) {
n = _PY_READ_MAX;
}
#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (n > INT_MAX)
n = INT_MAX;
n = read(self->fd,
PyBytes_AS_STRING(result) + total,
(int)n);
Expand Down Expand Up @@ -668,10 +670,10 @@ fileio_read(fileio *self, PyObject *args)
return fileio_readall(self);
}

#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (size > INT_MAX)
size = INT_MAX;
#endif
if (size > _PY_READ_MAX) {
size = _PY_READ_MAX;
}

bytes = PyBytes_FromStringAndSize(NULL, size);
if (bytes == NULL)
return NULL;
Expand Down Expand Up @@ -723,9 +725,10 @@ fileio_write(fileio *self, PyObject *args)
Py_BEGIN_ALLOW_THREADS
errno = 0;
len = pbuf.len;
if (len > _PY_WRITE_MAX) {
len = _PY_WRITE_MAX;
}
#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (len > INT_MAX)
len = INT_MAX;
n = write(self->fd, pbuf.buf, (int)len);
#else
n = write(self->fd, pbuf.buf, len);
Expand Down
8 changes: 6 additions & 2 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -6805,6 +6805,9 @@ posix_read(PyObject *self, PyObject *args)
return posix_error();
}
Py_BEGIN_ALLOW_THREADS
if (size > _PY_READ_MAX) {
size = _PY_READ_MAX;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum. It seems inefficient to allocate size bytes and only later pass _PY_READ_MAX. This truncation should be done before PyString_FromStringAndSize().

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping @matrixise

}
n = read(fd, PyString_AsString(buffer), size);
Py_END_ALLOW_THREADS
if (n < 0) {
Expand Down Expand Up @@ -6836,9 +6839,10 @@ posix_write(PyObject *self, PyObject *args)
}
len = pbuf.len;
Py_BEGIN_ALLOW_THREADS
if (len > _PY_WRITE_MAX) {
len = _PY_WRITE_MAX;
}
#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (len > INT_MAX)
len = INT_MAX;
size = write(fd, pbuf.buf, (int)len);
#else
size = write(fd, pbuf.buf, len);
Expand Down