54
54
# define SMALLCHUNK BUFSIZ
55
55
#endif
56
56
57
+ /* Size at which a buffer is considered "large" and behavior should change to
58
+ avoid excessive memory allocation */
59
+ #define LARGE_BUFFER_CUTOFF_SIZE 65536
57
60
58
61
/*[clinic input]
59
62
module _io
@@ -72,6 +75,7 @@ typedef struct {
72
75
unsigned int closefd : 1 ;
73
76
char finalizing ;
74
77
unsigned int blksize ;
78
+ Py_off_t size_estimated ;
75
79
PyObject * weakreflist ;
76
80
PyObject * dict ;
77
81
} fileio ;
@@ -196,6 +200,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
196
200
self -> appending = 0 ;
197
201
self -> seekable = -1 ;
198
202
self -> blksize = 0 ;
203
+ self -> size_estimated = -1 ;
199
204
self -> closefd = 1 ;
200
205
self -> weakreflist = NULL ;
201
206
}
@@ -482,6 +487,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
482
487
if (fdfstat .st_blksize > 1 )
483
488
self -> blksize = fdfstat .st_blksize ;
484
489
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490
+ if (fdfstat .st_size < PY_SSIZE_T_MAX ) {
491
+ self -> size_estimated = (Py_off_t )fdfstat .st_size ;
492
+ }
485
493
}
486
494
487
495
#if defined(MS_WINDOWS ) || defined(__CYGWIN__ )
@@ -684,7 +692,7 @@ new_buffersize(fileio *self, size_t currentsize)
684
692
giving us amortized linear-time behavior. For bigger sizes, use a
685
693
less-than-double growth factor to avoid excessive allocation. */
686
694
assert (currentsize <= PY_SSIZE_T_MAX );
687
- if (currentsize > 65536 )
695
+ if (currentsize > LARGE_BUFFER_CUTOFF_SIZE )
688
696
addend = currentsize >> 3 ;
689
697
else
690
698
addend = 256 + currentsize ;
@@ -707,41 +715,48 @@ static PyObject *
707
715
_io_FileIO_readall_impl (fileio * self )
708
716
/*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
709
717
{
710
- struct _Py_stat_struct status ;
711
718
Py_off_t pos , end ;
712
719
PyObject * result ;
713
720
Py_ssize_t bytes_read = 0 ;
714
721
Py_ssize_t n ;
715
722
size_t bufsize ;
716
- int fstat_result ;
717
723
718
- if (self -> fd < 0 )
724
+ if (self -> fd < 0 ) {
719
725
return err_closed ();
726
+ }
720
727
721
- Py_BEGIN_ALLOW_THREADS
722
- _Py_BEGIN_SUPPRESS_IPH
728
+ end = self -> size_estimated ;
729
+ if (end <= 0 ) {
730
+ /* Use a default size and resize as needed. */
731
+ bufsize = SMALLCHUNK ;
732
+ }
733
+ else {
734
+ /* This is probably a real file, so we try to allocate a
735
+ buffer one byte larger than the rest of the file. If the
736
+ calculation is right then we should get EOF without having
737
+ to enlarge the buffer. */
738
+ bufsize = (size_t )(end ) + 1 ;
739
+
740
+ /* While a lot of code does open().read() to get the whole contents
741
+ of a file it is possible a caller seeks/reads a ways into the file
742
+ then calls readall() to get the rest, which would result in allocating
743
+ more than required. Guard against that for larger files where we expect
744
+ the I/O time to dominate anyways while keeping small files fast. */
745
+ if (bufsize > LARGE_BUFFER_CUTOFF_SIZE ) {
746
+ Py_BEGIN_ALLOW_THREADS
747
+ _Py_BEGIN_SUPPRESS_IPH
723
748
#ifdef MS_WINDOWS
724
- pos = _lseeki64 (self -> fd , 0L , SEEK_CUR );
749
+ pos = _lseeki64 (self -> fd , 0L , SEEK_CUR );
725
750
#else
726
- pos = lseek (self -> fd , 0L , SEEK_CUR );
751
+ pos = lseek (self -> fd , 0L , SEEK_CUR );
727
752
#endif
728
- _Py_END_SUPPRESS_IPH
729
- fstat_result = _Py_fstat_noraise (self -> fd , & status );
730
- Py_END_ALLOW_THREADS
731
-
732
- if (fstat_result == 0 )
733
- end = status .st_size ;
734
- else
735
- end = (Py_off_t )- 1 ;
753
+ _Py_END_SUPPRESS_IPH
754
+ Py_END_ALLOW_THREADS
736
755
737
- if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX ) {
738
- /* This is probably a real file, so we try to allocate a
739
- buffer one byte larger than the rest of the file. If the
740
- calculation is right then we should get EOF without having
741
- to enlarge the buffer. */
742
- bufsize = (size_t )(end - pos + 1 );
743
- } else {
744
- bufsize = SMALLCHUNK ;
756
+ if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX ) {
757
+ bufsize = bufsize - Py_SAFE_DOWNCAST (pos , Py_off_t , size_t );
758
+ }
759
+ }
745
760
}
746
761
747
762
result = PyBytes_FromStringAndSize (NULL , bufsize );
@@ -783,7 +798,6 @@ _io_FileIO_readall_impl(fileio *self)
783
798
return NULL ;
784
799
}
785
800
bytes_read += n ;
786
- pos += n ;
787
801
}
788
802
789
803
if (PyBytes_GET_SIZE (result ) > bytes_read ) {
0 commit comments