Implement memory performance fixes for downloads to non-seekable streams

aemous · web-flow · commit 0b00ce1cb23d · 2025-04-17T12:15:17.000-04:00
diff --git a/.changes/next-release/enhancement-s3-32825.json b/.changes/next-release/enhancement-s3-32825.json
@@ -0,0 +1,5 @@
+{
+  "type": "enhancement",
+  "category": "``s3``",
+  "description": "Implement memory performance fixes for downloads to non-seekable streams"
+}
diff --git a/s3transfer/download.py b/s3transfer/download.py
@@ -750,7 +750,7 @@ class DeferQueue:
 
     def __init__(self):
         self._writes = []
-        self._pending_offsets = set()
+        self._pending_offsets = {}
         self._next_offset = 0
 
     def request_writes(self, offset, data):
@@ -766,23 +766,49 @@ def request_writes(self, offset, data):
         each method call.
 
         """
-        if offset < self._next_offset:
+        if offset + len(data) <= self._next_offset:
             # This is a request for a write that we've already
             # seen.  This can happen in the event of a retry
             # where if we retry at at offset N/2, we'll requeue
             # offsets 0-N/2 again.
             return []
         writes = []
+        if offset < self._next_offset:
+            # This is a special case where the write request contains
+            # both seen AND unseen data. This can happen in the case
+            # that we queue part of a chunk due to an incomplete read,
+            # then pop the incomplete data for writing, then we receive the retry
+            # for the incomplete read which contains both the previously-seen
+            # partial chunk followed by the rest of the chunk (unseen).
+            #
+            # In this case, we discard the bytes of the data we've already
+            # queued before, and only queue the unseen bytes.
+            seen_bytes = self._next_offset - offset
+            data = data[seen_bytes:]
+            offset = self._next_offset
         if offset in self._pending_offsets:
-            # We've already queued this offset so this request is
-            # a duplicate.  In this case we should ignore
-            # this request and prefer what's already queued.
-            return []
-        heapq.heappush(self._writes, (offset, data))
-        self._pending_offsets.add(offset)
-        while self._writes and self._writes[0][0] == self._next_offset:
-            next_write = heapq.heappop(self._writes)
-            writes.append({'offset': next_write[0], 'data': next_write[1]})
-            self._pending_offsets.remove(next_write[0])
-            self._next_offset += len(next_write[1])
+            queued_data = self._pending_offsets[offset]
+            if len(data) <= len(queued_data):
+                # We already have a write request queued with the same offset
+                # with at least as much data that is present in this
+                # request. In this case we should ignore this request
+                # and prefer what's already queued.
+                return []
+            else:
+                # We have a write request queued with the same offset,
+                # but this request contains more data. This can happen
+                # in the case of a retried request due to an incomplete
+                # read, followed by a retry containing the full response
+                # body. In this case, we should overwrite the queued
+                # request with this one since it contains more data.
+                self._pending_offsets[offset] = data
+        else:
+            heapq.heappush(self._writes, offset)
+            self._pending_offsets[offset] = data
+        while self._writes and self._writes[0] == self._next_offset:
+            next_write_offset = heapq.heappop(self._writes)
+            next_write = self._pending_offsets[next_write_offset]
+            writes.append({'offset': next_write_offset, 'data': next_write})
+            del self._pending_offsets[next_write_offset]
+            self._next_offset += len(next_write)
         return writes
diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py
@@ -963,7 +963,7 @@ def test_data_queued_in_order(self):
         writes = self.q.request_writes(offset=11, data='hello again')
         self.assertEqual(writes, [{'offset': 11, 'data': 'hello again'}])
 
-    def test_writes_below_min_offset_are_ignored(self):
+    def test_writes_with_last_byte_below_min_offset_are_ignored(self):
         self.q.request_writes(offset=0, data='a')
         self.q.request_writes(offset=1, data='b')
         self.q.request_writes(offset=2, data='c')
@@ -978,13 +978,36 @@ def test_writes_below_min_offset_are_ignored(self):
             [{'offset': 3, 'data': 'd'}],
         )
 
-    def test_duplicate_writes_are_ignored(self):
+    def test_writes_below_min_offset_with_last_byte_above_min_offset_are_queued(
+        self,
+    ):
+        self.assertEqual(
+            self.q.request_writes(offset=0, data='foo'),
+            [{'offset': 0, 'data': 'foo'}],
+        )
+
+        # Even though a partial write of 'foo' was completed at offset 0,
+        # a subsequent request to the same offset with a longer
+        # length will write a substring of the data starting at
+        # index next_offset.
+        self.assertEqual(
+            self.q.request_writes(offset=0, data='foo bar'),
+            [
+                # Note we are writing a substring of the data starting at
+                # index 3 since the previous write to index 0 had length 3.
+                {'offset': 3, 'data': ' bar'},
+            ],
+        )
+
+    def test_duplicate_writes_same_length_are_ignored(self):
         self.q.request_writes(offset=2, data='c')
         self.q.request_writes(offset=1, data='b')
 
         # We're still waiting for offset=0, but if
-        # a duplicate write comes in for offset=2/offset=1
-        # it's ignored.  This gives "first one wins" behavior.
+        # a duplicate write with the same length comes in
+        # for offset=2/offset=1 it's ignored.
+        # This gives "largest one wins" behavior with ties
+        # broken via "first one wins".
         self.assertEqual(self.q.request_writes(offset=2, data='X'), [])
         self.assertEqual(self.q.request_writes(offset=1, data='Y'), [])
 
@@ -997,3 +1020,22 @@ def test_duplicate_writes_are_ignored(self):
                 {'offset': 2, 'data': 'c'},
             ],
         )
+
+    def test_duplicate_writes_longer_length_update_queue(self):
+        self.q.request_writes(offset=1, data='b')
+
+        # We're still waiting for offset=0, but if
+        # a write comes in for the same offset=2/offset=1
+        # it updates the queue if the request contains more data.
+        # This gives "largest one wins" behavior with ties
+        # broken via "first one wins".
+        self.assertEqual(self.q.request_writes(offset=1, data='bar'), [])
+
+        self.assertEqual(
+            self.q.request_writes(offset=0, data='a'),
+            [
+                {'offset': 0, 'data': 'a'},
+                # Note we're seeing 'bar', and not 'b', since len(bar) > len(b).
+                {'offset': 1, 'data': 'bar'},
+            ],
+        )