Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions localstack-core/localstack/services/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@
get_system_metadata_from_request,
get_unique_key_id,
is_bucket_name_valid,
is_version_older_than_other,
parse_copy_source_range_header,
parse_post_object_tagging_xml,
parse_range_header,
Expand Down Expand Up @@ -1856,9 +1857,9 @@ def list_object_versions(
continue

# it is possible that the version_id_marker related object has been deleted, in that case, start
# as soon as the next version id is smaller than the version id marker (meaning this version was
# as soon as the next version id is older than the version id marker (meaning this version was
# next after the now-deleted version)
elif version.version_id < version_id_marker:
elif is_version_older_than_other(version.version_id, version_id_marker):
version_key_marker_found = True

elif not version_key_marker_found:
Expand Down
21 changes: 16 additions & 5 deletions localstack-core/localstack/services/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,11 +1039,14 @@ def parse_post_object_tagging_xml(tagging: str) -> Optional[dict]:


def generate_safe_version_id() -> str:
# the safe b64 encoding is inspired by the stdlib base64.urlsafe_b64encode
# and also using stdlib secrets.token_urlsafe, but with a different alphabet adapted for S3
# VersionId cannot have `-` in it, as it fails in XML
# we need an ever-increasing number, in order to properly implement pagination around ListObjectVersions
# by prepending the version-id with a global increasing number, we can lexicographically sort the versions
"""
Generate a safe version id for XML rendering.
VersionId cannot have `-` in it, as it fails in XML
Combine an ever-increasing part in the 8 first characters, and a random element.
We need the sequence part in order to properly implement pagination around ListObjectVersions.
By prefixing the version-id with a global increasing number, we can sort the versions
:return: an S3 VersionId containing a timestamp part in the first 8 characters
"""
tok = next(global_version_id_sequence()).to_bytes(length=6) + token_bytes(18)
return base64.b64encode(tok, altchars=b"._").rstrip(b"=").decode("ascii")

Expand All @@ -1053,3 +1056,11 @@ def global_version_id_sequence():
start = int(time.time() * 1000)
# itertools.count is thread safe over the GIL since its getAndIncrement operation is a single python bytecode op
return itertools.count(start)


def is_version_older_than_other(version_id: str, other: str):
"""
Compare the sequence part of a VersionId against the sequence part of a VersionIdMarker. Used for pagination
See `generate_safe_version_id`
"""
return base64.b64decode(version_id, altchars=b"._") < base64.b64decode(other, altchars=b"._")
35 changes: 34 additions & 1 deletion tests/aws/services/s3/test_s3_list_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,10 @@ def test_list_objects_versions_with_prefix(

@markers.aws.validated
def test_list_objects_versions_with_prefix_only_and_pagination(
self, s3_bucket, snapshot, aws_client, aws_http_client_factory
self,
s3_bucket,
snapshot,
aws_client,
):
snapshot.add_transformer(snapshot.transform.s3_api())
aws_client.s3.put_bucket_versioning(
Expand Down Expand Up @@ -559,6 +562,36 @@ def test_list_objects_versions_with_prefix_only_and_pagination(
)
snapshot.match("list-object-version-prefix-page-2-after-delete", page_2_response)

@markers.aws.validated
def test_list_objects_versions_with_prefix_only_and_pagination_many_versions(
self,
s3_bucket,
aws_client,
):
aws_client.s3.put_bucket_versioning(
Bucket=s3_bucket,
VersioningConfiguration={"Status": "Enabled"},
)
# with our internal pagination system, we use characters from the alphabet (lower and upper) + digits
# by creating more than 100 objects, we can make sure we circle all the way around our sequencing, and properly
# paginate over all of them
for _ in range(101):
aws_client.s3.put_object(Bucket=s3_bucket, Key="prefixed_key")

paginator = aws_client.s3.get_paginator("list_object_versions")
# even if the PageIterator looks like it should be an iterator, it's actually an iterable and needs to be
# wrapped in `iter`
page_iterator = iter(
paginator.paginate(
Bucket=s3_bucket, Prefix="prefix", PaginationConfig={"PageSize": 100}
)
)
page_1 = next(page_iterator)
assert len(page_1["Versions"]) == 100

page_2 = next(page_iterator)
assert len(page_2["Versions"]) == 1

@markers.aws.validated
def test_s3_list_object_versions_timestamp_precision(
self, s3_bucket, aws_client, aws_http_client_factory
Expand Down
3 changes: 3 additions & 0 deletions tests/aws/services/s3/test_s3_list_operations.validation.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_list_objects_versions_with_prefix_only_and_pagination": {
"last_validated_date": "2025-02-13T03:52:21+00:00"
},
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_list_objects_versions_with_prefix_only_and_pagination_many_versions": {
"last_validated_date": "2025-02-13T20:24:26+00:00"
},
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_s3_list_object_versions_timestamp_precision": {
"last_validated_date": "2025-01-21T18:15:06+00:00"
},
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/services/s3/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import os
import re
import string
import zoneinfo
from io import BytesIO
from urllib.parse import urlparse
Expand Down Expand Up @@ -707,3 +708,21 @@ def test_s3_context_manager(self, tmpdir):
pass

temp_storage_backend.close()


class TestS3VersionIdGenerator:
def test_version_is_xml_safe(self):
# assert than we don't have unsafe characters in 500 different versions id
safe_characters = string.ascii_letters + string.digits + "._"
assert all(
all(char in safe_characters for char in s3_utils.generate_safe_version_id())
for _ in range(500)
)

def test_version_id_ordering(self):
version_ids = [s3_utils.generate_safe_version_id() for _ in range(500)]

# assert that every version id can be ordered with each other
for index, version_id in enumerate(version_ids[1:]):
previous_version = version_ids[index]
assert s3_utils.is_version_older_than_other(previous_version, version_id)
Loading