From 5c9ad6201a8ca1786e06d78665265770a0696369 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 1 Jun 2017 10:50:50 -0700 Subject: [PATCH 1/5] Maintain a list of BufferedWriter objects. Flush them on exit. In Python 3, the buffer and the underlying file object are separate and so the order in which objects are finalized matters. This is unlike Python 2 where the file and buffer were a single object and finalization was done for both at the same time. In Python 3, if the file is finalized and closed before the buffer then the data in the buffer is lost. This change adds a doubly linked list of open file buffers. An atexit hook ensures they are flushed before proceeding with interpreter shutdown. This is addition does not remove the need to properly close files as there are other reasons why buffered data could get lost during finalization. Initial patch by Armin Rigo. --- Lib/_pyio.py | 21 ++++++++++++++++++ Modules/_io/_iomodule.c | 2 ++ Modules/_io/_iomodule.h | 2 ++ Modules/_io/bufferedio.c | 46 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 4653847bcb18e9..8e671199d1a0d4 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1185,6 +1185,7 @@ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): self.buffer_size = buffer_size self._write_buf = bytearray() self._write_lock = Lock() + _register_writer(self) def writable(self): return self.raw.writable() @@ -2574,3 +2575,23 @@ def encoding(self): def detach(self): # This doesn't make sense on StringIO. self._unsupported("detach") + + +# ____________________________________________________________ + +import atexit, weakref + +_all_writers = weakref.WeakKeyDictionary() + +def _register_writer(w): + # keep weak-ref to buffered writer + _all_writers[w] = True + +def _flush_all_writers(): + # Ensure all buffered writers are flushed before proceeding with + # normal shutdown. Otherwise, if the underlying file objects get + # finalized before the buffered writer wrapping it then any buffered + # data will be lost. + for w in _all_writers: + w.flush() +atexit.register(_flush_all_writers) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index f0621f4d4ab4bb..5db44f970d22ba 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -766,6 +766,8 @@ PyInit__io(void) !(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0))) goto fail; + _Py_PyAtExit(_PyIO_atexit_flush); + state->initialized = 1; return m; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index db8403774ead23..1dce5dada4e23a 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -183,3 +183,5 @@ extern PyObject *_PyIO_empty_str; extern PyObject *_PyIO_empty_bytes; extern PyTypeObject _PyBytesIOBuffer_Type; + +extern void _PyIO_atexit_flush(void); diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 189b1cd8442827..38d14249f290ed 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -196,6 +196,10 @@ bufferediobase_write(PyObject *self, PyObject *args) } +struct doubly_linked_s { + struct doubly_linked_s *prev, *next; +}; + typedef struct { PyObject_HEAD @@ -240,8 +244,15 @@ typedef struct { PyObject *dict; PyObject *weakreflist; + + /* a doubly-linked chained list of "buffered" objects that need to + be flushed when the process exits */ + struct doubly_linked_s buffered_writers_list; } buffered; +static struct doubly_linked_s doubly_linked_end = { + &doubly_linked_end, &doubly_linked_end }; + /* Implementation notes: @@ -386,6 +397,15 @@ _enter_buffered_busy(buffered *self) (self->buffer_size * (size / self->buffer_size))) +static void +remove_from_linked_list(buffered *self) +{ + self->buffered_writers_list.next->prev = self->buffered_writers_list.prev; + self->buffered_writers_list.prev->next = self->buffered_writers_list.next; + self->buffered_writers_list.prev = NULL; + self->buffered_writers_list.next = NULL; +} + static void buffered_dealloc(buffered *self) { @@ -394,6 +414,8 @@ buffered_dealloc(buffered *self) return; _PyObject_GC_UNTRACK(self); self->ok = 0; + if (self->buffered_writers_list.next != NULL) + remove_from_linked_list(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)self); Py_CLEAR(self->raw); @@ -1817,10 +1839,34 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw, self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type && Py_TYPE(raw) == &PyFileIO_Type); + if (self->buffered_writers_list.next == NULL) { + self->buffered_writers_list.prev = &doubly_linked_end; + self->buffered_writers_list.next = doubly_linked_end.next; + doubly_linked_end.next->prev = &self->buffered_writers_list; + doubly_linked_end.next = &self->buffered_writers_list; + } + self->ok = 1; return 0; } +/* +* Ensure all buffered writers are flushed before proceeding with +* normal shutdown. Otherwise, if the underlying file objects get +* finalized before the buffered writer wrapping it then any buffered +* data will be lost. +*/ +void _PyIO_atexit_flush(void) +{ + while (doubly_linked_end.next != &doubly_linked_end) { + buffered *buf = (buffered *)(((char *)doubly_linked_end.next) - + offsetof(buffered, buffered_writers_list)); + remove_from_linked_list(buf); + buffered_flush(buf, NULL); + PyErr_Clear(); + } +} + static Py_ssize_t _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) { From c6c1bbd4579c65c15c9b2b2ef926a2f6cca35eb3 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 5 Jun 2017 10:24:41 -0700 Subject: [PATCH 2/5] Use weakref.WeakSet instead of WeakKeyDictionary. --- Lib/_pyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 8e671199d1a0d4..be520a5546d2dd 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -2581,11 +2581,11 @@ def detach(self): import atexit, weakref -_all_writers = weakref.WeakKeyDictionary() +_all_writers = weakref.WeakSet() def _register_writer(w): # keep weak-ref to buffered writer - _all_writers[w] = True + _all_writers.add(w) def _flush_all_writers(): # Ensure all buffered writers are flushed before proceeding with From 74731be15ab24b53b9352b620400f52f653f17ce Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 5 Jun 2017 10:25:19 -0700 Subject: [PATCH 3/5] Simplify buffered double-linked list types. --- Modules/_io/bufferedio.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 38d14249f290ed..50c87c1746d6f2 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -196,11 +196,7 @@ bufferediobase_write(PyObject *self, PyObject *args) } -struct doubly_linked_s { - struct doubly_linked_s *prev, *next; -}; - -typedef struct { +typedef struct _buffered { PyObject_HEAD PyObject *raw; @@ -247,11 +243,14 @@ typedef struct { /* a doubly-linked chained list of "buffered" objects that need to be flushed when the process exits */ - struct doubly_linked_s buffered_writers_list; + struct _buffered *next, *prev; } buffered; -static struct doubly_linked_s doubly_linked_end = { - &doubly_linked_end, &doubly_linked_end }; +/* the actual list of buffered objects */ +static buffered buffer_list_end = { + .next = &buffer_list_end, + .prev = &buffer_list_end +}; /* Implementation notes: @@ -400,10 +399,10 @@ _enter_buffered_busy(buffered *self) static void remove_from_linked_list(buffered *self) { - self->buffered_writers_list.next->prev = self->buffered_writers_list.prev; - self->buffered_writers_list.prev->next = self->buffered_writers_list.next; - self->buffered_writers_list.prev = NULL; - self->buffered_writers_list.next = NULL; + self->next->prev = self->prev; + self->prev->next = self->next; + self->prev = NULL; + self->next = NULL; } static void @@ -414,7 +413,7 @@ buffered_dealloc(buffered *self) return; _PyObject_GC_UNTRACK(self); self->ok = 0; - if (self->buffered_writers_list.next != NULL) + if (self->next != NULL) remove_from_linked_list(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)self); @@ -1839,11 +1838,11 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw, self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type && Py_TYPE(raw) == &PyFileIO_Type); - if (self->buffered_writers_list.next == NULL) { - self->buffered_writers_list.prev = &doubly_linked_end; - self->buffered_writers_list.next = doubly_linked_end.next; - doubly_linked_end.next->prev = &self->buffered_writers_list; - doubly_linked_end.next = &self->buffered_writers_list; + if (self->next == NULL) { + self->prev = &buffer_list_end; + self->next = buffer_list_end.next; + buffer_list_end.next->prev = self; + buffer_list_end.next = self; } self->ok = 1; @@ -1858,9 +1857,8 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw, */ void _PyIO_atexit_flush(void) { - while (doubly_linked_end.next != &doubly_linked_end) { - buffered *buf = (buffered *)(((char *)doubly_linked_end.next) - - offsetof(buffered, buffered_writers_list)); + while (buffer_list_end.next != &buffer_list_end) { + buffered *buf = buffer_list_end.next; remove_from_linked_list(buf); buffered_flush(buf, NULL); PyErr_Clear(); From 61952f01722d6a7c3335f393f7ed5925eec5eca2 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 23 Jun 2017 09:57:35 -0700 Subject: [PATCH 4/5] In _flush_all_writers(), suppress errors from flush(). --- Lib/_pyio.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index be520a5546d2dd..3aa2b24c040aae 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -2593,5 +2593,8 @@ def _flush_all_writers(): # finalized before the buffered writer wrapping it then any buffered # data will be lost. for w in _all_writers: - w.flush() + try: + w.flush() + except: + pass atexit.register(_flush_all_writers) From a98052f4ad38cf3465cd342748b57beb68260405 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 4 Sep 2017 12:41:20 -0700 Subject: [PATCH 5/5] Remove NEWS entry, use blurb. --- .../Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst new file mode 100644 index 00000000000000..185664c747d0e3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-04-12-46-25.bpo-17852.OxAtCg.rst @@ -0,0 +1,2 @@ +Maintain a list of open buffered files, flush them before exiting the +interpreter. Based on a patch from Armin Rigo.