[AWS][Transcribe] Adding fix for validating Audio length (#12450)

brunodmartins · web-flow · commit ef8845dae56c · 2025-04-16T17:17:32.000+02:00
diff --git a/localstack-core/localstack/services/transcribe/provider.py b/localstack-core/localstack/services/transcribe/provider.py
@@ -43,6 +43,11 @@
 from localstack.utils.run import run
 from localstack.utils.threads import start_thread
 
+# Amazon Transcribe service calls are limited to four hours (or 2 GB) per API call for our batch service.
+# The streaming service can accommodate open connections up to four hours long.
+# See https://aws.amazon.com/transcribe/faqs/
+MAX_AUDIO_DURATION_SECONDS = 60 * 60 * 4
+
 LOG = logging.getLogger(__name__)
 
 VOSK_MODELS_URL = f"{HUGGING_FACE_ENDPOINT}/vosk-models/resolve/main/"
@@ -305,6 +310,11 @@ def _run_transcription_job(self, args: Tuple[TranscribeStore, str]):
             format = ffprobe_output["format"]["format_name"]
             LOG.debug("Media format detected as: %s", format)
             job["MediaFormat"] = SUPPORTED_FORMAT_NAMES[format]
+            duration = ffprobe_output["format"]["duration"]
+
+            if float(duration) >= MAX_AUDIO_DURATION_SECONDS:
+                failure_reason = "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again."
+                raise RuntimeError()
 
             # Determine the sample rate of input audio if possible
             for stream in ffprobe_output["streams"]:
diff --git a/tests/aws/services/transcribe/test_transcribe.py b/tests/aws/services/transcribe/test_transcribe.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import tempfile
 import threading
 import time
 from urllib.parse import urlparse
@@ -16,6 +17,7 @@
 from localstack.testing.aws.util import is_aws_cloud
 from localstack.testing.pytest import markers
 from localstack.utils.files import new_tmp_file
+from localstack.utils.run import run
 from localstack.utils.strings import short_uid, to_str
 from localstack.utils.sync import poll_condition, retry
 from localstack.utils.threads import start_worker_thread
@@ -439,3 +441,36 @@ def test_transcribe_error_speaker_labels(self, transcribe_create_job, aws_client
         with pytest.raises(ParamValidationError) as e:
             transcribe_create_job(audio_file=file_path, params=settings)
         snapshot.match("err_speaker_labels_diarization", e.value)
+
+    @markers.aws.validated
+    @markers.snapshot.skip_snapshot_verify(
+        paths=[
+            "$..TranscriptionJob..Settings",
+            "$..TranscriptionJob..Transcript",
+            "$..TranscriptionJob..MediaFormat",
+        ]
+    )
+    def test_transcribe_error_invalid_length(self, transcribe_create_job, aws_client, snapshot):
+        ffmpeg_bin = ffmpeg_package.get_installer().get_ffmpeg_path()
+        media_file = os.path.join(tempfile.gettempdir(), "audio_4h.mp3")
+
+        run(
+            f"{ffmpeg_bin} -f lavfi -i anullsrc=r=44100:cl=mono -t 14400 -q:a 9 -acodec libmp3lame {media_file}"
+        )
+        job_name = transcribe_create_job(audio_file=media_file)
+
+        def _is_transcription_done():
+            transcription_status = aws_client.transcribe.get_transcription_job(
+                TranscriptionJobName=job_name
+            )
+            return transcription_status["TranscriptionJob"]["TranscriptionJobStatus"] == "FAILED"
+
+        # empirically it takes around
+        # <5sec for a vosk transcription
+        # ~100sec for an AWS transcription -> adjust timeout accordingly
+        assert poll_condition(_is_transcription_done, timeout=100), (
+            f"could not finish transcription job: {job_name} in time"
+        )
+
+        job = aws_client.transcribe.get_transcription_job(TranscriptionJobName=job_name)
+        snapshot.match("TranscribeErrorInvalidLength", job)
diff --git a/tests/aws/services/transcribe/test_transcribe.snapshot.json b/tests/aws/services/transcribe/test_transcribe.snapshot.json
@@ -893,5 +893,32 @@
     "recorded-content": {
       "err_speaker_labels_diarization": "Parameter validation failed:\nInvalid value for parameter Settings.MaxSpeakerLabels, value: 1, valid min value: 2"
     }
+  },
+  "tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
+    "recorded-date": "12-04-2025, 16:02:39",
+    "recorded-content": {
+      "TranscribeErrorInvalidLength": {
+        "TranscriptionJob": {
+          "CreationTime": "datetime",
+          "FailureReason": "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again.",
+          "LanguageCode": "en-GB",
+          "Media": {
+            "MediaFileUri": "s3:/<test-bucket>/test-clip.wav"
+          },
+          "Settings": {
+            "ChannelIdentification": false,
+            "ShowAlternatives": false
+          },
+          "StartTime": "datetime",
+          "Transcript": {},
+          "TranscriptionJobName": "<transcription-job:1>",
+          "TranscriptionJobStatus": "FAILED"
+        },
+        "ResponseMetadata": {
+          "HTTPHeaders": {},
+          "HTTPStatusCode": 200
+        }
+      }
+    }
   }
 }
diff --git a/tests/aws/services/transcribe/test_transcribe.validation.json b/tests/aws/services/transcribe/test_transcribe.validation.json
@@ -11,6 +11,9 @@
   "tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_list_transcription_jobs": {
     "last_validated_date": "2023-10-06T15:11:25+00:00"
   },
+  "tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
+    "last_validated_date": "2025-04-12T16:02:38+00:00"
+  },
   "tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_speaker_labels": {
     "last_validated_date": "2025-03-19T15:42:06+00:00"
   },