Skip to content

Commit 00e64fd

Browse files
authored
S3: fix checksum behavior and add new AWS behavior (#12401)
1 parent 3a46145 commit 00e64fd

15 files changed

+1510
-252
lines changed

localstack-core/localstack/aws/api/s3/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3579,8 +3579,10 @@ class PostResponse(TypedDict, total=False):
35793579
ETagHeader: Optional[ETag]
35803580
ChecksumCRC32: Optional[ChecksumCRC32]
35813581
ChecksumCRC32C: Optional[ChecksumCRC32C]
3582+
ChecksumCRC64NVME: Optional[ChecksumCRC64NVME]
35823583
ChecksumSHA1: Optional[ChecksumSHA1]
35833584
ChecksumSHA256: Optional[ChecksumSHA256]
3585+
ChecksumType: Optional[ChecksumType]
35843586
ServerSideEncryption: Optional[ServerSideEncryption]
35853587
VersionId: Optional[ObjectVersionId]
35863588
SSECustomerAlgorithm: Optional[SSECustomerAlgorithm]

localstack-core/localstack/aws/spec-patches.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,12 @@
540540
"location": "header",
541541
"locationName": "x-amz-checksum-crc32c"
542542
},
543+
"ChecksumCRC64NVME":{
544+
"shape":"ChecksumCRC64NVME",
545+
"documentation":"<p>This header can be used as a data integrity check to verify that the data received is the same data that was originally sent. This header specifies the Base64 encoded, 64-bit <code>CRC64NVME</code> checksum of the object. The <code>CRC64NVME</code> checksum is always a full object checksum. For more information, see <a href=\"https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html\">Checking object integrity in the Amazon S3 User Guide</a>.</p>",
546+
"location":"header",
547+
"locationName":"x-amz-checksum-crc64nvme"
548+
},
543549
"ChecksumSHA1": {
544550
"shape": "ChecksumSHA1",
545551
"documentation": "<p>The base64-encoded, 160-bit SHA-1 digest of the object. This will only be present if it was uploaded with the object. With multipart uploads, this may not be a checksum value of the object. For more information about how checksums are calculated with multipart uploads, see <a href=\"https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums\"> Checking object integrity</a> in the <i>Amazon S3 User Guide</i>.</p>",
@@ -552,6 +558,12 @@
552558
"location": "header",
553559
"locationName": "x-amz-checksum-sha256"
554560
},
561+
"ChecksumType":{
562+
"shape":"ChecksumType",
563+
"documentation":"<p>This header specifies the checksum type of the object, which determines how part-level checksums are combined to create an object-level checksum for multipart objects. You can use this header as a data integrity check to verify that the checksum type that is received is the same checksum that was specified. If the checksum type doesn’t match the checksum type that was specified for the object during the <code>CreateMultipartUpload</code> request, it’ll result in a <code>BadDigest</code> error. For more information, see Checking object integrity in the Amazon S3 User Guide. </p>",
564+
"location":"header",
565+
"locationName":"x-amz-checksum-type"
566+
},
555567
"ServerSideEncryption": {
556568
"shape": "ServerSideEncryption",
557569
"documentation": "<p>If you specified server-side encryption either with an Amazon Web Services KMS key or Amazon S3-managed encryption key in your PUT request, the response includes this header. It confirms the encryption algorithm that Amazon S3 used to encrypt the object.</p>",

localstack-core/localstack/services/s3/models.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def __init__(
310310
self.etag = etag
311311
self.size = size
312312
self.expires = expires
313-
self.checksum_algorithm = checksum_algorithm
313+
self.checksum_algorithm = checksum_algorithm or ChecksumAlgorithm.CRC64NVME
314314
self.checksum_value = checksum_value
315315
self.checksum_type = checksum_type
316316
self.encryption = encryption
@@ -429,6 +429,7 @@ class S3Multipart:
429429
upload_id: MultipartUploadId
430430
checksum_value: Optional[str]
431431
checksum_type: Optional[ChecksumType]
432+
checksum_algorithm: ChecksumAlgorithm
432433
initiated: datetime
433434
precondition: bool
434435

@@ -463,6 +464,7 @@ def __init__(
463464
self.tagging = tagging
464465
self.checksum_value = None
465466
self.checksum_type = checksum_type
467+
self.checksum_algorithm = checksum_algorithm
466468
self.precondition = precondition
467469
self.object = S3Object(
468470
key=key,
@@ -490,13 +492,13 @@ def complete_multipart(
490492
):
491493
last_part_index = len(parts) - 1
492494
object_etag = hashlib.md5(usedforsecurity=False)
493-
has_checksum = self.object.checksum_algorithm is not None
495+
has_checksum = self.checksum_algorithm is not None
494496
checksum_hash = None
495497
if has_checksum:
496-
if self.object.checksum_type == ChecksumType.COMPOSITE:
497-
checksum_hash = get_s3_checksum(self.object.checksum_algorithm)
498+
if self.checksum_type == ChecksumType.COMPOSITE:
499+
checksum_hash = get_s3_checksum(self.checksum_algorithm)
498500
else:
499-
checksum_hash = CombinedCrcHash(self.object.checksum_algorithm)
501+
checksum_hash = CombinedCrcHash(self.checksum_algorithm)
500502

501503
pos = 0
502504
parts_map = {}
@@ -520,7 +522,7 @@ def complete_multipart(
520522
)
521523

522524
if has_checksum:
523-
checksum_key = f"Checksum{self.object.checksum_algorithm.upper()}"
525+
checksum_key = f"Checksum{self.checksum_algorithm.upper()}"
524526
if not (part_checksum := part.get(checksum_key)):
525527
if self.checksum_type == ChecksumType.COMPOSITE:
526528
# weird case, they still try to validate a different checksum type than the multipart
@@ -532,7 +534,7 @@ def complete_multipart(
532534
)
533535

534536
raise InvalidRequest(
535-
f"The upload was created using a {self.object.checksum_algorithm.lower()} checksum. "
537+
f"The upload was created using a {self.checksum_algorithm.lower()} checksum. "
536538
f"The complete request must include the checksum for each part. "
537539
f"It was missing for part {part_number} in the request."
538540
)
@@ -584,7 +586,7 @@ def complete_multipart(
584586
checksum_value = f"{checksum_value}-{len(parts)}"
585587

586588
elif self.checksum_type == ChecksumType.FULL_OBJECT:
587-
if validation_checksum != checksum_value:
589+
if validation_checksum and validation_checksum != checksum_value:
588590
raise BadDigest(
589591
f"The {self.object.checksum_algorithm.lower()} you specified did not match the calculated checksum."
590592
)

localstack-core/localstack/services/s3/provider.py

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,9 @@ def put_object(
786786
s3_stored_object.write(body)
787787

788788
if s3_object.checksum_algorithm:
789-
if not validate_checksum_value(s3_object.checksum_value, checksum_algorithm):
789+
if not s3_object.checksum_value:
790+
s3_object.checksum_value = s3_stored_object.checksum
791+
elif not validate_checksum_value(s3_object.checksum_value, checksum_algorithm):
790792
self._storage_backend.remove(bucket_name, s3_object)
791793
raise InvalidRequest(
792794
f"Value for x-amz-checksum-{s3_object.checksum_algorithm.lower()} header is invalid."
@@ -1066,6 +1068,9 @@ def head_object(
10661068
if checksum_algorithm := s3_object.checksum_algorithm:
10671069
if (request.get("ChecksumMode") or "").upper() == "ENABLED":
10681070
response[f"Checksum{checksum_algorithm.upper()}"] = s3_object.checksum_value
1071+
response["ChecksumType"] = getattr(
1072+
s3_object, "checksum_type", ChecksumType.FULL_OBJECT
1073+
)
10691074

10701075
if s3_object.parts and request.get("PartNumber"):
10711076
response["PartsCount"] = len(s3_object.parts)
@@ -1091,6 +1096,7 @@ def head_object(
10911096

10921097
if range_data:
10931098
response["ContentLength"] = range_data.content_length
1099+
response["ContentRange"] = range_data.content_range
10941100
response["StatusCode"] = 206
10951101

10961102
add_encryption_to_response(response, s3_object=s3_object)
@@ -1470,6 +1476,7 @@ def copy_object(
14701476
acl = get_access_control_policy_for_new_resource_request(
14711477
request, owner=dest_s3_bucket.owner
14721478
)
1479+
checksum_algorithm = request.get("ChecksumAlgorithm")
14731480

14741481
s3_object = S3Object(
14751482
key=dest_key,
@@ -1479,7 +1486,7 @@ def copy_object(
14791486
expires=request.get("Expires"),
14801487
user_metadata=user_metadata,
14811488
system_metadata=system_metadata,
1482-
checksum_algorithm=request.get("ChecksumAlgorithm") or src_s3_object.checksum_algorithm,
1489+
checksum_algorithm=checksum_algorithm or src_s3_object.checksum_algorithm,
14831490
encryption=encryption_parameters.encryption,
14841491
kms_key_id=encryption_parameters.kms_key_id,
14851492
bucket_key_enabled=request.get(
@@ -2175,6 +2182,10 @@ def create_multipart_upload(
21752182
owner=s3_bucket.owner,
21762183
precondition=object_exists_for_precondition_write(s3_bucket, key),
21772184
)
2185+
# it seems if there is SSE-C on the multipart, AWS S3 will override the default Checksum behavior (but not on
2186+
# PutObject)
2187+
if sse_c_key_md5:
2188+
s3_multipart.object.checksum_algorithm = None
21782189

21792190
s3_bucket.multiparts[s3_multipart.id] = s3_multipart
21802191

@@ -2286,7 +2297,10 @@ def upload_part(
22862297
decoded_content_length = int(headers.get("x-amz-decoded-content-length", 0))
22872298
body = AwsChunkedDecoder(body, decoded_content_length, s3_part)
22882299

2289-
if s3_part.checksum_algorithm != s3_multipart.object.checksum_algorithm:
2300+
if (
2301+
s3_multipart.checksum_algorithm
2302+
and s3_part.checksum_algorithm != s3_multipart.checksum_algorithm
2303+
):
22902304
error_req_checksum = checksum_algorithm.lower() if checksum_algorithm else "null"
22912305
error_mp_checksum = (
22922306
s3_multipart.object.checksum_algorithm.lower()
@@ -2525,7 +2539,7 @@ def complete_multipart_upload(
25252539
UploadId=upload_id,
25262540
)
25272541

2528-
mpu_checksum_algorithm = s3_multipart.object.checksum_algorithm
2542+
mpu_checksum_algorithm = s3_multipart.checksum_algorithm
25292543
mpu_checksum_type = getattr(s3_multipart, "checksum_type", None)
25302544

25312545
if checksum_type and checksum_type != mpu_checksum_type:
@@ -2555,24 +2569,36 @@ def complete_multipart_upload(
25552569
s3_multipart.complete_multipart(
25562570
parts, mpu_size=mpu_object_size, validation_checksum=checksum_value
25572571
)
2572+
if mpu_checksum_algorithm and (
2573+
(
2574+
checksum_value
2575+
and mpu_checksum_type == ChecksumType.FULL_OBJECT
2576+
and not checksum_type
2577+
)
2578+
or any(
2579+
checksum_value
2580+
for checksum_type, checksum_value in checksum_map.items()
2581+
if checksum_type != checksum_algorithm
2582+
)
2583+
):
2584+
# this is not ideal, but this validation comes last... after the validation of individual parts
2585+
s3_multipart.object.parts.clear()
2586+
raise BadDigest(
2587+
f"The {mpu_checksum_algorithm.lower()} you specified did not match the calculated checksum."
2588+
)
25582589
else:
25592590
s3_multipart.complete_multipart(parts)
25602591

2561-
if (
2562-
mpu_checksum_algorithm
2563-
and not checksum_type
2564-
and mpu_checksum_type == ChecksumType.FULL_OBJECT
2565-
):
2566-
# this is not ideal, but this validation comes last... after the validation of individual parts
2567-
s3_multipart.object.parts.clear()
2568-
raise BadDigest(
2569-
f"The {mpu_checksum_algorithm.lower()} you specified did not match the calculated checksum."
2570-
)
2571-
25722592
stored_multipart = self._storage_backend.get_multipart(bucket, s3_multipart)
25732593
stored_multipart.complete_multipart(
25742594
[s3_multipart.parts.get(part_number) for part_number in parts_numbers]
25752595
)
2596+
if not s3_multipart.checksum_algorithm and s3_multipart.object.checksum_algorithm:
2597+
with self._storage_backend.open(
2598+
bucket, s3_multipart.object, mode="r"
2599+
) as s3_stored_object:
2600+
s3_multipart.object.checksum_value = s3_stored_object.checksum
2601+
s3_multipart.object.checksum_type = ChecksumType.FULL_OBJECT
25762602

25772603
s3_object = s3_multipart.object
25782604

@@ -2599,9 +2625,11 @@ def complete_multipart_upload(
25992625
if s3_object.version_id:
26002626
response["VersionId"] = s3_object.version_id
26012627

2602-
if s3_object.checksum_algorithm:
2628+
# it seems AWS is not returning checksum related fields if the object has KMS encryption ¯\_(ツ)_/¯
2629+
# but it still generates them, and they can be retrieved with regular GetObject and such operations
2630+
if s3_object.checksum_algorithm and not s3_object.kms_key_id:
26032631
response[f"Checksum{s3_object.checksum_algorithm.upper()}"] = s3_object.checksum_value
2604-
response["ChecksumType"] = mpu_checksum_type
2632+
response["ChecksumType"] = s3_object.checksum_type
26052633

26062634
if s3_object.expiration:
26072635
response["Expiration"] = s3_object.expiration # TODO: properly parse the datetime
@@ -2691,7 +2719,7 @@ def list_parts(
26912719
PartNumber=part_number,
26922720
Size=part.size,
26932721
)
2694-
if s3_multipart.object.checksum_algorithm:
2722+
if s3_multipart.checksum_algorithm:
26952723
part_item[f"Checksum{part.checksum_algorithm.upper()}"] = part.checksum_value
26962724

26972725
parts.append(part_item)
@@ -2720,7 +2748,7 @@ def list_parts(
27202748

27212749
if part_number_marker:
27222750
response["PartNumberMarker"] = part_number_marker
2723-
if s3_multipart.object.checksum_algorithm:
2751+
if s3_multipart.checksum_algorithm:
27242752
response["ChecksumAlgorithm"] = s3_multipart.object.checksum_algorithm
27252753
response["ChecksumType"] = getattr(s3_multipart, "checksum_type", None)
27262754

@@ -2820,8 +2848,8 @@ def list_multipart_uploads(
28202848
Owner=multipart.initiator, # TODO: check the difference
28212849
Initiator=multipart.initiator,
28222850
)
2823-
if multipart.object.checksum_algorithm:
2824-
multipart_upload["ChecksumAlgorithm"] = multipart.object.checksum_algorithm
2851+
if multipart.checksum_algorithm:
2852+
multipart_upload["ChecksumAlgorithm"] = multipart.checksum_algorithm
28252853
multipart_upload["ChecksumType"] = getattr(multipart, "checksum_type", None)
28262854

28272855
uploads.append(multipart_upload)
@@ -4288,7 +4316,10 @@ def post_object(
42884316
with self._storage_backend.open(bucket, s3_object, mode="w") as s3_stored_object:
42894317
s3_stored_object.write(stream)
42904318

4291-
if checksum_algorithm and s3_object.checksum_value != s3_stored_object.checksum:
4319+
if not s3_object.checksum_value:
4320+
s3_object.checksum_value = s3_stored_object.checksum
4321+
4322+
elif checksum_algorithm and s3_object.checksum_value != s3_stored_object.checksum:
42924323
self._storage_backend.remove(bucket, s3_object)
42934324
raise InvalidRequest(
42944325
f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid."
@@ -4334,7 +4365,8 @@ def post_object(
43344365
response["VersionId"] = s3_object.version_id
43354366

43364367
if s3_object.checksum_algorithm:
4337-
response[f"Checksum{checksum_algorithm.upper()}"] = s3_object.checksum_value
4368+
response[f"Checksum{s3_object.checksum_algorithm.upper()}"] = s3_object.checksum_value
4369+
response["ChecksumType"] = ChecksumType.FULL_OBJECT
43384370

43394371
if s3_bucket.lifecycle_rules:
43404372
if expiration_header := self._get_expiration_header(

0 commit comments

Comments
 (0)