Skip to content

Commit ef8845d

Browse files
[AWS][Transcribe] Adding fix for validating Audio length (#12450)
1 parent 3936886 commit ef8845d

File tree

4 files changed

+75
-0
lines changed

4 files changed

+75
-0
lines changed

localstack-core/localstack/services/transcribe/provider.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@
4343
from localstack.utils.run import run
4444
from localstack.utils.threads import start_thread
4545

46+
# Amazon Transcribe service calls are limited to four hours (or 2 GB) per API call for our batch service.
47+
# The streaming service can accommodate open connections up to four hours long.
48+
# See https://aws.amazon.com/transcribe/faqs/
49+
MAX_AUDIO_DURATION_SECONDS = 60 * 60 * 4
50+
4651
LOG = logging.getLogger(__name__)
4752

4853
VOSK_MODELS_URL = f"{HUGGING_FACE_ENDPOINT}/vosk-models/resolve/main/"
@@ -305,6 +310,11 @@ def _run_transcription_job(self, args: Tuple[TranscribeStore, str]):
305310
format = ffprobe_output["format"]["format_name"]
306311
LOG.debug("Media format detected as: %s", format)
307312
job["MediaFormat"] = SUPPORTED_FORMAT_NAMES[format]
313+
duration = ffprobe_output["format"]["duration"]
314+
315+
if float(duration) >= MAX_AUDIO_DURATION_SECONDS:
316+
failure_reason = "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again."
317+
raise RuntimeError()
308318

309319
# Determine the sample rate of input audio if possible
310320
for stream in ffprobe_output["streams"]:

tests/aws/services/transcribe/test_transcribe.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import os
3+
import tempfile
34
import threading
45
import time
56
from urllib.parse import urlparse
@@ -16,6 +17,7 @@
1617
from localstack.testing.aws.util import is_aws_cloud
1718
from localstack.testing.pytest import markers
1819
from localstack.utils.files import new_tmp_file
20+
from localstack.utils.run import run
1921
from localstack.utils.strings import short_uid, to_str
2022
from localstack.utils.sync import poll_condition, retry
2123
from localstack.utils.threads import start_worker_thread
@@ -439,3 +441,36 @@ def test_transcribe_error_speaker_labels(self, transcribe_create_job, aws_client
439441
with pytest.raises(ParamValidationError) as e:
440442
transcribe_create_job(audio_file=file_path, params=settings)
441443
snapshot.match("err_speaker_labels_diarization", e.value)
444+
445+
@markers.aws.validated
446+
@markers.snapshot.skip_snapshot_verify(
447+
paths=[
448+
"$..TranscriptionJob..Settings",
449+
"$..TranscriptionJob..Transcript",
450+
"$..TranscriptionJob..MediaFormat",
451+
]
452+
)
453+
def test_transcribe_error_invalid_length(self, transcribe_create_job, aws_client, snapshot):
454+
ffmpeg_bin = ffmpeg_package.get_installer().get_ffmpeg_path()
455+
media_file = os.path.join(tempfile.gettempdir(), "audio_4h.mp3")
456+
457+
run(
458+
f"{ffmpeg_bin} -f lavfi -i anullsrc=r=44100:cl=mono -t 14400 -q:a 9 -acodec libmp3lame {media_file}"
459+
)
460+
job_name = transcribe_create_job(audio_file=media_file)
461+
462+
def _is_transcription_done():
463+
transcription_status = aws_client.transcribe.get_transcription_job(
464+
TranscriptionJobName=job_name
465+
)
466+
return transcription_status["TranscriptionJob"]["TranscriptionJobStatus"] == "FAILED"
467+
468+
# empirically it takes around
469+
# <5sec for a vosk transcription
470+
# ~100sec for an AWS transcription -> adjust timeout accordingly
471+
assert poll_condition(_is_transcription_done, timeout=100), (
472+
f"could not finish transcription job: {job_name} in time"
473+
)
474+
475+
job = aws_client.transcribe.get_transcription_job(TranscriptionJobName=job_name)
476+
snapshot.match("TranscribeErrorInvalidLength", job)

tests/aws/services/transcribe/test_transcribe.snapshot.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,5 +893,32 @@
893893
"recorded-content": {
894894
"err_speaker_labels_diarization": "Parameter validation failed:\nInvalid value for parameter Settings.MaxSpeakerLabels, value: 1, valid min value: 2"
895895
}
896+
},
897+
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
898+
"recorded-date": "12-04-2025, 16:02:39",
899+
"recorded-content": {
900+
"TranscribeErrorInvalidLength": {
901+
"TranscriptionJob": {
902+
"CreationTime": "datetime",
903+
"FailureReason": "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again.",
904+
"LanguageCode": "en-GB",
905+
"Media": {
906+
"MediaFileUri": "s3:/<test-bucket>/test-clip.wav"
907+
},
908+
"Settings": {
909+
"ChannelIdentification": false,
910+
"ShowAlternatives": false
911+
},
912+
"StartTime": "datetime",
913+
"Transcript": {},
914+
"TranscriptionJobName": "<transcription-job:1>",
915+
"TranscriptionJobStatus": "FAILED"
916+
},
917+
"ResponseMetadata": {
918+
"HTTPHeaders": {},
919+
"HTTPStatusCode": 200
920+
}
921+
}
922+
}
896923
}
897924
}

tests/aws/services/transcribe/test_transcribe.validation.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_list_transcription_jobs": {
1212
"last_validated_date": "2023-10-06T15:11:25+00:00"
1313
},
14+
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
15+
"last_validated_date": "2025-04-12T16:02:38+00:00"
16+
},
1417
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_speaker_labels": {
1518
"last_validated_date": "2025-03-19T15:42:06+00:00"
1619
},

0 commit comments

Comments
 (0)