From 68f90aae3ec7edde8f3c3b832c51c01c57fe684a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Wirtel?= Date: Thu, 18 Oct 2018 01:05:04 +0200 Subject: [PATCH] [3.6] bpo-24658: Fix read/write greater than 2 GiB on macOS (GH-1705) On macOS, fix reading from and writing into a file with a size larger than 2 GiB. (cherry picked from commit 74a8b6ea7e0a8508b13a1c75ec9b91febd8b5557) --- Include/fileutils.h | 13 ++++++++++ Lib/test/test_largefile.py | 15 +++++++++--- .../2018-10-17-14-36-08.bpo-24658.Naddgx.rst | 1 + Modules/_io/fileio.c | 8 +++---- Python/fileutils.c | 24 ++++--------------- 5 files changed, 34 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/macOS/2018-10-17-14-36-08.bpo-24658.Naddgx.rst diff --git a/Include/fileutils.h b/Include/fileutils.h index 9fce7d23cf0efa..4efcf13219c398 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -29,6 +29,19 @@ PyAPI_FUNC(char*) _Py_EncodeLocaleEx( PyAPI_FUNC(PyObject *) _Py_device_encoding(int); +#if defined(MS_WINDOWS) || defined(__APPLE__) + /* On Windows, the count parameter of read() is an int (bpo-9015, bpo-9611). + On macOS 10.13, read() and write() with more than INT_MAX bytes + fail with EINVAL (bpo-24658). */ +# define _PY_READ_MAX INT_MAX +# define _PY_WRITE_MAX INT_MAX +#else + /* write() should truncate the input to PY_SSIZE_T_MAX bytes, + but it's safer to do it ourself to have a portable behaviour */ +# define _PY_READ_MAX PY_SSIZE_T_MAX +# define _PY_WRITE_MAX PY_SSIZE_T_MAX +#endif + #ifdef MS_WINDOWS struct _Py_stat_struct { unsigned long st_dev; diff --git a/Lib/test/test_largefile.py b/Lib/test/test_largefile.py index d07bb8eee5a792..fd2cc03259e832 100644 --- a/Lib/test/test_largefile.py +++ b/Lib/test/test_largefile.py @@ -5,12 +5,12 @@ import stat import sys import unittest -from test.support import TESTFN, requires, unlink +from test.support import TESTFN, requires, unlink, bigmemtest import io # C implementation of io import _pyio as pyio # Python implementation of io -# size of file to create (>2GB; 2GB == 2147483648 bytes) -size = 2500000000 +# size of file to create (>2 GiB; 2 GiB == 2,147,483,648 bytes) +size = 2_500_000_000 class LargeFileTest: """Test that each file function works as expected for large @@ -45,6 +45,15 @@ def tearDownClass(cls): raise cls.failureException('File was not truncated by opening ' 'with mode "wb"') + # _pyio.FileIO.readall() uses a temporary bytearray then casted to bytes, + # so memuse=2 is needed + @bigmemtest(size=size, memuse=2, dry_run=False) + def test_large_read(self, _size): + # bpo-24658: Test that a read greater than 2GB does not fail. + with self.open(TESTFN, "rb") as f: + self.assertEqual(len(f.read()), size + 1) + self.assertEqual(f.tell(), size + 1) + def test_osstat(self): self.assertEqual(os.stat(TESTFN)[stat.ST_SIZE], size+1) diff --git a/Misc/NEWS.d/next/macOS/2018-10-17-14-36-08.bpo-24658.Naddgx.rst b/Misc/NEWS.d/next/macOS/2018-10-17-14-36-08.bpo-24658.Naddgx.rst new file mode 100644 index 00000000000000..ff660a125c64f3 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2018-10-17-14-36-08.bpo-24658.Naddgx.rst @@ -0,0 +1 @@ +On macOS, fix reading from and writing into a file with a size larger than 2 GiB. \ No newline at end of file diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 52cbb94b24b6b5..e014552887fa4e 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -799,11 +799,9 @@ _io_FileIO_read_impl(fileio *self, Py_ssize_t size) if (size < 0) return _io_FileIO_readall_impl(self); -#ifdef MS_WINDOWS - /* On Windows, the count parameter of read() is an int */ - if (size > INT_MAX) - size = INT_MAX; -#endif + if (size > _PY_READ_MAX) { + size = _PY_READ_MAX; + } bytes = PyBytes_FromStringAndSize(NULL, size); if (bytes == NULL) diff --git a/Python/fileutils.c b/Python/fileutils.c index b8e489125e3c69..306838ecce44d7 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1263,18 +1263,9 @@ _Py_read(int fd, void *buf, size_t count) * handler raised an exception. */ assert(!PyErr_Occurred()); -#ifdef MS_WINDOWS - if (count > INT_MAX) { - /* On Windows, the count parameter of read() is an int */ - count = INT_MAX; - } -#else - if (count > PY_SSIZE_T_MAX) { - /* if count is greater than PY_SSIZE_T_MAX, - * read() result is undefined */ - count = PY_SSIZE_T_MAX; + if (count > _PY_READ_MAX) { + count = _PY_READ_MAX; } -#endif _Py_BEGIN_SUPPRESS_IPH do { @@ -1325,15 +1316,10 @@ _Py_write_impl(int fd, const void *buf, size_t count, int gil_held) depending on heap usage). */ count = 32767; } - else if (count > INT_MAX) - count = INT_MAX; -#else - if (count > PY_SSIZE_T_MAX) { - /* write() should truncate count to PY_SSIZE_T_MAX, but it's safer - * to do it ourself to have a portable behaviour. */ - count = PY_SSIZE_T_MAX; - } #endif + if (count > _PY_WRITE_MAX) { + count = _PY_WRITE_MAX; + } if (gil_held) { do {