From 78c4de057cff5b57c5e091c7974a202859a31114 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 17 Jun 2024 23:16:11 -0700 Subject: [PATCH 01/15] Reduce system calls in full-file readall case This reduces the system call count of a simple program[0] that reads all the `.rst` files in Doc by over 10% (5706 -> 4734 system calls on my linux system, 5813 -> 4875 on my macOS) This reduces the number of `fstat()` calls always and seek calls most the time. Stat was always called twice, once at open (to error early on directories), and a second time to get the size of the file to be able to read the whole file in one read. Now the size is cached with the first call. The code keeps an optimization that if the user had previously read a lot of data, the current position is subtracted from the number of bytes to read. That is somewhat expensive so only do it on larger files, otherwise just try and read the extra bytes and resize the PyBytes as needeed. I built a little test program to validate the behavior + assumptions around relative costs and then ran it under `strace` to get a log of the system calls. Full samples below[1]. After the changes, this is everything in one `filename.read_text()`: ```python3 openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3` fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0` ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` This does make some tradeoffs 1. If the file size changes between open() and readall(), this will still get all the data but might have more read calls. 2. I experimented with avoiding the stat + cached result for small files in general, but on my dev workstation at least that tended to reduce performance compared to using the fstat(). [0] ```python3 from pathlib import Path nlines = [] for filename in Path("cpython/Doc").glob("**/*.rst"): nlines.append(len(filename.read_text())) ``` [1] before small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` after small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` before large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ``` after large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ``` --- Modules/_io/fileio.c | 59 ++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index b5129ffcbffdcf..6c0c883578df36 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -72,6 +72,7 @@ typedef struct { unsigned int closefd : 1; char finalizing; unsigned int blksize; + Py_off_t size_estimated; PyObject *weakreflist; PyObject *dict; } fileio; @@ -196,6 +197,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->appending = 0; self->seekable = -1; self->blksize = 0; + self->size_estimated = -1; self->closefd = 1; self->weakreflist = NULL; } @@ -482,6 +484,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (fdfstat.st_blksize > 1) self->blksize = fdfstat.st_blksize; #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ + if (fdfstat.st_size < PY_SSIZE_T_MAX) { + self->size_estimated = (Py_off_t)fdfstat.st_size; + } } #if defined(MS_WINDOWS) || defined(__CYGWIN__) @@ -707,41 +712,48 @@ static PyObject * _io_FileIO_readall_impl(fileio *self) /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ { - struct _Py_stat_struct status; Py_off_t pos, end; PyObject *result; Py_ssize_t bytes_read = 0; Py_ssize_t n; size_t bufsize; - int fstat_result; - if (self->fd < 0) + if (self->fd < 0) { return err_closed(); + } - Py_BEGIN_ALLOW_THREADS - _Py_BEGIN_SUPPRESS_IPH + end = self->size_estimated; + if (end <= 0) { + /* Use a default size and resize as needed. */ + bufsize = SMALLCHUNK; + } + else { + /* This is probably a real file, so we try to allocate a + buffer one byte larger than the rest of the file. If the + calculation is right then we should get EOF without having + to enlarge the buffer. */ + bufsize = (size_t)(end) + 1; + + /* While a lot of code does open().read() to get the whole contents + of a file it is possible a caller seeks/reads a ways into the file + then calls readall() to get the rest, which would result in allocating + more than required. Guard against that for larger files where we expect + the I/O time to dominate anyways while keeping small files fast. */ + if (bufsize > 65536) { + Py_BEGIN_ALLOW_THREADS + _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS - pos = _lseeki64(self->fd, 0L, SEEK_CUR); + pos = _lseeki64(self->fd, 0L, SEEK_CUR); #else - pos = lseek(self->fd, 0L, SEEK_CUR); + pos = lseek(self->fd, 0L, SEEK_CUR); #endif - _Py_END_SUPPRESS_IPH - fstat_result = _Py_fstat_noraise(self->fd, &status); - Py_END_ALLOW_THREADS - - if (fstat_result == 0) - end = status.st_size; - else - end = (Py_off_t)-1; + _Py_END_SUPPRESS_IPH + Py_END_ALLOW_THREADS - if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) { - /* This is probably a real file, so we try to allocate a - buffer one byte larger than the rest of the file. If the - calculation is right then we should get EOF without having - to enlarge the buffer. */ - bufsize = (size_t)(end - pos + 1); - } else { - bufsize = SMALLCHUNK; + if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) { + bufsize = bufsize - pos; + } + } } result = PyBytes_FromStringAndSize(NULL, bufsize); @@ -783,7 +795,6 @@ _io_FileIO_readall_impl(fileio *self) return NULL; } bytes_read += n; - pos += n; } if (PyBytes_GET_SIZE(result) > bytes_read) { From 30d335e5a849d026b515ffeb75e6a1c977730a1b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 19 Jun 2024 19:54:36 +0000 Subject: [PATCH 02/15] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst new file mode 100644 index 00000000000000..7c110c9b1d96d8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst @@ -0,0 +1 @@ +Reduce the number of system calls invoked when reading a whole file (ex. `open('a.txt').read()`). For a sample program that reads the contents of the 400+ `.rst` files in the cpython `Doc` code folder, there is a over 10% reduction in system call count. From 9d7f925af5a39e0e0fd33c476f5fa015e9bd1362 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 19 Jun 2024 12:57:15 -0700 Subject: [PATCH 03/15] Update news to pass lint --- .../2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst index 7c110c9b1d96d8..d6324c4dc06468 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst @@ -1 +1 @@ -Reduce the number of system calls invoked when reading a whole file (ex. `open('a.txt').read()`). For a sample program that reads the contents of the 400+ `.rst` files in the cpython `Doc` code folder, there is a over 10% reduction in system call count. +Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is a over 10% reduction in system call count. From dd0b2943db06d526977e2aa7b3d023cce550d6a4 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 19 Jun 2024 13:35:55 -0700 Subject: [PATCH 04/15] Fix warning around type coercion on 64 bit windows; range is explicitly checked --- Modules/_io/fileio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 6c0c883578df36..4392277114f00e 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -751,7 +751,7 @@ _io_FileIO_readall_impl(fileio *self) Py_END_ALLOW_THREADS if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) { - bufsize = bufsize - pos; + bufsize = bufsize - (size_t)(pos); } } } From 7ad6fa81faf35f194c7daf0373c2aef571c56fdc Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 27 Jun 2024 16:12:28 -0700 Subject: [PATCH 05/15] Change constant to named constant per review --- Modules/_io/fileio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 4392277114f00e..d6dcd3cfcc411c 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -54,6 +54,9 @@ # define SMALLCHUNK BUFSIZ #endif +/* Size at which a buffer is considered "large" and behavior should change to + avoid excessive memory allocation */ +#define LARGE_BUFFER_CUTOFF_SIZE 65536 /*[clinic input] module _io @@ -689,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize) giving us amortized linear-time behavior. For bigger sizes, use a less-than-double growth factor to avoid excessive allocation. */ assert(currentsize <= PY_SSIZE_T_MAX); - if (currentsize > 65536) + if (currentsize > LARGE_BUFFER_CUTOFF_SIZE) addend = currentsize >> 3; else addend = 256 + currentsize; @@ -739,7 +742,7 @@ _io_FileIO_readall_impl(fileio *self) then calls readall() to get the rest, which would result in allocating more than required. Guard against that for larger files where we expect the I/O time to dominate anyways while keeping small files fast. */ - if (bufsize > 65536) { + if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS From fa9ac6ae0852e5886535721703f846e3a7651494 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 27 Jun 2024 16:19:01 -0700 Subject: [PATCH 06/15] Move downcast to Py_SAFE_DOWNCAST --- Modules/_io/fileio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index d6dcd3cfcc411c..594647421e0014 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -754,7 +754,7 @@ _io_FileIO_readall_impl(fileio *self) Py_END_ALLOW_THREADS if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) { - bufsize = bufsize - (size_t)(pos); + bufsize = bufsize - Py_SAFE_DOWNCAST(pos, Py_off_t, size_t); } } } From 93aee472cc93d0d8d9e20daaa897e57ce18d532c Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Thu, 27 Jun 2024 18:15:43 -0700 Subject: [PATCH 07/15] Update Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst --- .../2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst index d6324c4dc06468..46481d8f31aaba 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-19-19-54-35.gh-issue-120754.uF29sj.rst @@ -1 +1 @@ -Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is a over 10% reduction in system call count. +Reduce the number of system calls invoked when reading a whole file (ex. ``open('a.txt').read()``). For a sample program that reads the contents of the 400+ ``.rst`` files in the cpython repository ``Doc`` folder, there is an over 10% reduction in system call count. From 39e48ee6c8d582e00d5d8959d748b1b6d6b04820 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 22:27:58 -0700 Subject: [PATCH 08/15] Update pyio to try and fstat and seek less --- Lib/_pyio.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 7d298e1674b49a..d627166e470384 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1575,6 +1575,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None): # don't exist. pass self._blksize = getattr(fdfstat, 'st_blksize', 0) + self._estimated_size = getattr(fdfstat, 'st_size', -1) if self._blksize <= 1: self._blksize = DEFAULT_BUFFER_SIZE @@ -1654,14 +1655,18 @@ def readall(self): """ self._checkClosed() self._checkReadable() - bufsize = DEFAULT_BUFFER_SIZE - try: - pos = os.lseek(self._fd, 0, SEEK_CUR) - end = os.fstat(self._fd).st_size - if end >= pos: - bufsize = end - pos + 1 - except OSError: - pass + if self._estimated_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + bufsize = self._estimated_size + 1 + + if self._estimated_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._estimated_size >= pos: + bufsize = self._estimated_size - pos + 1 + except OSError: + pass result = bytearray() while True: From b7d3880170a083c59b64e83aff762bc1465f1342 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 23:55:52 -0700 Subject: [PATCH 09/15] Cap read size at _PY_READ_MAX to fix windows x86 size_t is too small (and read would cap it anyways) to read the whole file --- Modules/_io/fileio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 594647421e0014..55afc677b18ff7 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -735,7 +735,12 @@ _io_FileIO_readall_impl(fileio *self) buffer one byte larger than the rest of the file. If the calculation is right then we should get EOF without having to enlarge the buffer. */ - bufsize = (size_t)(end) + 1; + if (end >= _PY_READ_MAX) { + bufsize = _PY_READ_MAX; + } + else { + bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1; + } /* While a lot of code does open().read() to get the whole contents of a file it is possible a caller seeks/reads a ways into the file @@ -753,12 +758,13 @@ _io_FileIO_readall_impl(fileio *self) _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS - if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) { - bufsize = bufsize - Py_SAFE_DOWNCAST(pos, Py_off_t, size_t); + if (end >= pos && pos >= 0 && end - pos < _PY_READ_MAX) { + bufsize = Py_SAFE_DOWNCAST(end - pos, Py_off_t, size_t) + 1; } } } + result = PyBytes_FromStringAndSize(NULL, bufsize); if (result == NULL) return NULL; From a4c2cb681d46a587f3cc5ebc49f78b4db1cadb05 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 29 Jun 2024 00:33:54 -0700 Subject: [PATCH 10/15] Cap initial bufsize on pyio --- Lib/_pyio.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index d627166e470384..4d8ef43cd06afe 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1668,6 +1668,11 @@ def readall(self): except OSError: pass + # Cap read size so we don't try reading larger than a long on x86 + # can hold. + if bufsize > sys.maxsize: + bufsize = sys.maxsize + result = bytearray() while True: if len(result) >= bufsize: From 84bd2d879ff73935c0f9e16bf14a9557bb5d95ee Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 29 Jun 2024 01:13:11 -0700 Subject: [PATCH 11/15] Pyio was relying on getting the size after truncate for test_largefile to pass --- Lib/_pyio.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 4d8ef43cd06afe..75b5ad1b1a47d2 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1575,9 +1575,9 @@ def __init__(self, file, mode='r', closefd=True, opener=None): # don't exist. pass self._blksize = getattr(fdfstat, 'st_blksize', 0) - self._estimated_size = getattr(fdfstat, 'st_size', -1) if self._blksize <= 1: self._blksize = DEFAULT_BUFFER_SIZE + self._estimated_size = fdfstat.st_size if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1668,11 +1668,6 @@ def readall(self): except OSError: pass - # Cap read size so we don't try reading larger than a long on x86 - # can hold. - if bufsize > sys.maxsize: - bufsize = sys.maxsize - result = bytearray() while True: if len(result) >= bufsize: @@ -1747,6 +1742,7 @@ def truncate(self, size=None): if size is None: size = self.tell() os.ftruncate(self._fd, size) + self._estimated_size = size return size def close(self): From b505334be9b6ffa1f05b205a8b72f4894898a600 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 2 Jul 2024 09:51:20 +0200 Subject: [PATCH 12/15] Comment formatting --- Modules/_io/fileio.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 55afc677b18ff7..30e8181771d5ae 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -732,9 +732,9 @@ _io_FileIO_readall_impl(fileio *self) } else { /* This is probably a real file, so we try to allocate a - buffer one byte larger than the rest of the file. If the - calculation is right then we should get EOF without having - to enlarge the buffer. */ + buffer one byte larger than the rest of the file. If the + calculation is right then we should get EOF without having + to enlarge the buffer. */ if (end >= _PY_READ_MAX) { bufsize = _PY_READ_MAX; } @@ -743,10 +743,10 @@ _io_FileIO_readall_impl(fileio *self) } /* While a lot of code does open().read() to get the whole contents - of a file it is possible a caller seeks/reads a ways into the file - then calls readall() to get the rest, which would result in allocating - more than required. Guard against that for larger files where we expect - the I/O time to dominate anyways while keeping small files fast. */ + of a file it is possible a caller seeks/reads a ways into the file + then calls readall() to get the rest, which would result in allocating + more than required. Guard against that for larger files where we expect + the I/O time to dominate anyways while keeping small files fast. */ if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH From 7e276ec2082edd84d3266cd3a7c4e9d8176f0092 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 2 Jul 2024 23:07:55 -0700 Subject: [PATCH 13/15] size_estimated -> estimated_size --- Modules/_io/fileio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 30e8181771d5ae..93c444aeef301e 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -75,7 +75,7 @@ typedef struct { unsigned int closefd : 1; char finalizing; unsigned int blksize; - Py_off_t size_estimated; + Py_off_t estimated_size; PyObject *weakreflist; PyObject *dict; } fileio; @@ -200,7 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->appending = 0; self->seekable = -1; self->blksize = 0; - self->size_estimated = -1; + self->estimated_size = -1; self->closefd = 1; self->weakreflist = NULL; } @@ -488,7 +488,7 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, self->blksize = fdfstat.st_blksize; #endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ if (fdfstat.st_size < PY_SSIZE_T_MAX) { - self->size_estimated = (Py_off_t)fdfstat.st_size; + self->estimated_size = (Py_off_t)fdfstat.st_size; } } @@ -725,7 +725,7 @@ _io_FileIO_readall_impl(fileio *self) return err_closed(); } - end = self->size_estimated; + end = self->estimated_size; if (end <= 0) { /* Use a default size and resize as needed. */ bufsize = SMALLCHUNK; From 9be6d1dc4dc2ed02f4a0fef36a1103f904ad8d81 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 2 Jul 2024 23:10:33 -0700 Subject: [PATCH 14/15] Py_SAFE_DOWNCAST -> C casts --- Modules/_io/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 93c444aeef301e..37f500cf2fab0e 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -739,7 +739,7 @@ _io_FileIO_readall_impl(fileio *self) bufsize = _PY_READ_MAX; } else { - bufsize = Py_SAFE_DOWNCAST(end, Py_off_t, size_t) + 1; + bufsize = (size_t)end + 1; } /* While a lot of code does open().read() to get the whole contents @@ -759,7 +759,7 @@ _io_FileIO_readall_impl(fileio *self) Py_END_ALLOW_THREADS if (end >= pos && pos >= 0 && end - pos < _PY_READ_MAX) { - bufsize = Py_SAFE_DOWNCAST(end - pos, Py_off_t, size_t) + 1; + bufsize = (size_t)(end - pos) + 1; } } } From dc8e910c0d83a8411dfe33e0b2e4f80e54cbd440 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 3 Jul 2024 10:45:07 -0700 Subject: [PATCH 15/15] Apply suggestions from code review Co-authored-by: Victor Stinner --- Modules/_io/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 37f500cf2fab0e..d5bf328eee9c10 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -735,7 +735,7 @@ _io_FileIO_readall_impl(fileio *self) buffer one byte larger than the rest of the file. If the calculation is right then we should get EOF without having to enlarge the buffer. */ - if (end >= _PY_READ_MAX) { + if (end > _PY_READ_MAX - 1) { bufsize = _PY_READ_MAX; } else { @@ -758,7 +758,7 @@ _io_FileIO_readall_impl(fileio *self) _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS - if (end >= pos && pos >= 0 && end - pos < _PY_READ_MAX) { + if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) { bufsize = (size_t)(end - pos) + 1; } }