Skip to content

Commit 1b372ac

Browse files
committed
fix ListObjectVersions pagination when version id marker is deleted
1 parent 25d4d82 commit 1b372ac

File tree

5 files changed

+598
-9
lines changed

5 files changed

+598
-9
lines changed

localstack-core/localstack/services/s3/provider.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,13 @@ def list_object_versions(
18541854
if version.version_id == version_id_marker:
18551855
version_key_marker_found = True
18561856
continue
1857+
1858+
# it is possible that the version_id_marker related object has been deleted, in that case, start
1859+
# as soon as the next version id is smaller than the version id marker (meaning this version was
1860+
# next after the now-deleted version)
1861+
elif version.version_id < version_id_marker:
1862+
version_key_marker_found = True
1863+
18571864
elif not version_key_marker_found:
18581865
# as long as we have not passed the version_key_marker, skip the versions
18591866
continue

localstack-core/localstack/services/s3/utils.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
import codecs
33
import datetime
44
import hashlib
5+
import itertools
56
import logging
67
import re
8+
import time
79
import zlib
810
from enum import StrEnum
911
from secrets import token_bytes
@@ -68,6 +70,7 @@
6870
from localstack.services.s3.exceptions import InvalidRequest, MalformedXML
6971
from localstack.utils.aws import arns
7072
from localstack.utils.aws.arns import parse_arn
73+
from localstack.utils.objects import singleton_factory
7174
from localstack.utils.strings import (
7275
is_base64,
7376
to_bytes,
@@ -95,8 +98,6 @@
9598
RFC1123 = "%a, %d %b %Y %H:%M:%S GMT"
9699
_gmt_zone_info = ZoneInfo("GMT")
97100

98-
_version_id_safe_encode_translation = bytes.maketrans(b"+/", b"._")
99-
100101

101102
def s3_response_handler(chain: HandlerChain, context: RequestContext, response: Response):
102103
"""
@@ -1041,10 +1042,14 @@ def generate_safe_version_id() -> str:
10411042
# the safe b64 encoding is inspired by the stdlib base64.urlsafe_b64encode
10421043
# and also using stdlib secrets.token_urlsafe, but with a different alphabet adapted for S3
10431044
# VersionId cannot have `-` in it, as it fails in XML
1044-
tok = token_bytes(24)
1045-
return (
1046-
base64.b64encode(tok)
1047-
.translate(_version_id_safe_encode_translation)
1048-
.rstrip(b"=")
1049-
.decode("ascii")
1050-
)
1045+
# we need an ever-increasing number, in order to properly implement pagination around ListObjectVersions
1046+
# by prepending the version-id with a global increasing number, we can lexicographically sort the versions
1047+
tok = next(global_version_id_sequence()).to_bytes(length=6) + token_bytes(18)
1048+
return base64.b64encode(tok, altchars=b"._").rstrip(b"=").decode("ascii")
1049+
1050+
1051+
@singleton_factory
1052+
def global_version_id_sequence():
1053+
start = int(time.time() * 1000)
1054+
# itertools.count is thread safe over the GIL since its getAndIncrement operation is a single python bytecode op
1055+
return itertools.count(start)

tests/aws/services/s3/test_s3_list_operations.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,75 @@ def test_list_objects_versions_with_prefix(
490490
resp_dict["ListVersionsResult"].pop("@xmlns", None)
491491
snapshot.match("list-objects-versions-no-encoding", resp_dict)
492492

493+
@markers.aws.validated
494+
def test_list_objects_versions_with_prefix_only_and_pagination(
495+
self, s3_bucket, snapshot, aws_client, aws_http_client_factory
496+
):
497+
snapshot.add_transformer(snapshot.transform.s3_api())
498+
aws_client.s3.put_bucket_versioning(
499+
Bucket=s3_bucket,
500+
VersioningConfiguration={"Status": "Enabled"},
501+
)
502+
503+
for _ in range(10):
504+
aws_client.s3.put_object(Bucket=s3_bucket, Key="prefixed_key")
505+
506+
aws_client.s3.put_object(Bucket=s3_bucket, Key="non_prefixed_key")
507+
508+
prefixed_full = aws_client.s3.list_object_versions(Bucket=s3_bucket, Prefix="prefix")
509+
snapshot.match("list-object-version-prefix-full", prefixed_full)
510+
511+
full_response = aws_client.s3.list_object_versions(Bucket=s3_bucket)
512+
assert len(full_response["Versions"]) == 11
513+
514+
page_1_response = aws_client.s3.list_object_versions(
515+
Bucket=s3_bucket, Prefix="prefix", MaxKeys=5
516+
)
517+
snapshot.match("list-object-version-prefix-page-1", page_1_response)
518+
next_version_id_marker = page_1_response["NextVersionIdMarker"]
519+
520+
page_2_key_marker_only = aws_client.s3.list_object_versions(
521+
Bucket=s3_bucket,
522+
Prefix="prefix",
523+
MaxKeys=7,
524+
KeyMarker=page_1_response["NextKeyMarker"],
525+
)
526+
snapshot.match("list-object-version-prefix-key-marker-only", page_2_key_marker_only)
527+
528+
page_2_response = aws_client.s3.list_object_versions(
529+
Bucket=s3_bucket,
530+
Prefix="prefix",
531+
MaxKeys=10,
532+
KeyMarker=page_1_response["NextKeyMarker"],
533+
VersionIdMarker=page_1_response["NextVersionIdMarker"],
534+
)
535+
snapshot.match("list-object-version-prefix-page-2", page_2_response)
536+
537+
delete_version_id_marker = aws_client.s3.delete_objects(
538+
Bucket=s3_bucket,
539+
Delete={
540+
"Objects": [
541+
{"Key": version["Key"], "VersionId": version["VersionId"]}
542+
for version in page_1_response["Versions"]
543+
],
544+
},
545+
)
546+
# result is unordered in AWS, pretty hard to snapshot and tested in other places anyway
547+
assert len(delete_version_id_marker["Deleted"]) == 5
548+
assert any(
549+
version["VersionId"] == next_version_id_marker
550+
for version in delete_version_id_marker["Deleted"]
551+
)
552+
553+
page_2_response = aws_client.s3.list_object_versions(
554+
Bucket=s3_bucket,
555+
Prefix="prefix",
556+
MaxKeys=10,
557+
KeyMarker=page_1_response["NextKeyMarker"],
558+
VersionIdMarker=next_version_id_marker,
559+
)
560+
snapshot.match("list-object-version-prefix-page-2-after-delete", page_2_response)
561+
493562
@markers.aws.validated
494563
def test_s3_list_object_versions_timestamp_precision(
495564
self, s3_bucket, aws_client, aws_http_client_factory

0 commit comments

Comments
 (0)