localstack · sannya-singal · Mar 6, 2025 · Feb 13, 2025 · Mar 3, 2025 · Mar 5, 2025
diff --git a/localstack-core/localstack/constants.py b/localstack-core/localstack/constants.py
@@ -33,6 +33,9 @@
 # Artifacts endpoint
 ASSETS_ENDPOINT = "https://assets.localstack.cloud"
 
+# Hugging Face endpoint for localstack
+HUGGING_FACE_ENDPOINT = "https://huggingface.co/localstack"
+
 # host to bind to when starting the services
 BIND_HOST = "0.0.0.0"
 

diff --git a/localstack-core/localstack/services/transcribe/provider.py b/localstack-core/localstack/services/transcribe/provider.py
@@ -30,6 +30,7 @@
     TranscriptionJobSummary,
 )
 from localstack.aws.connect import connect_to
+from localstack.constants import HUGGING_FACE_ENDPOINT
 from localstack.packages.ffmpeg import ffmpeg_package
 from localstack.services.s3.utils import (
     get_bucket_and_key_from_presign_url,
@@ -44,6 +45,8 @@
 
 LOG = logging.getLogger(__name__)
 
+VOSK_MODELS_URL = f"{HUGGING_FACE_ENDPOINT}/vosk-models/resolve/main/"
+
 # Map of language codes to language models
 LANGUAGE_MODELS = {
     "en-IN": "vosk-model-small-en-in-0.4",
@@ -237,9 +240,15 @@ def download_model(name: str):
 
             from vosk import MODEL_PRE_URL  # noqa
 
-            download(
-                MODEL_PRE_URL + str(model_path.name) + ".zip", model_zip_path, verify_ssl=False
-            )
+            download_urls = [MODEL_PRE_URL, VOSK_MODELS_URL]
+
+            for url in download_urls:
+                try:
+                    download(url + str(model_path.name) + ".zip", model_zip_path, verify_ssl=False)
+                except Exception as e:
+                    LOG.warning("Failed to download model from %s: %s", url, e)
+                    continue
+                break
 
             LOG.debug("Extracting language model: %s", model_path.name)
             with ZipFile(model_zip_path, "r") as model_ref:

diff --git a/tests/aws/services/transcribe/test_transcribe.py b/tests/aws/services/transcribe/test_transcribe.py
@@ -136,7 +136,6 @@ def is_transcription_done():
             "$..Error..Code",
         ]
     )
-    @pytest.mark.skip(reason="flaky")
     def test_transcribe_happy_path(self, transcribe_create_job, snapshot, aws_client):
         file_path = os.path.join(BASEDIR, "../../files/en-gb.wav")
         job_name = transcribe_create_job(audio_file=file_path)
@@ -181,7 +180,6 @@ def is_transcription_done():
         ],
     )
     @markers.aws.needs_fixing
-    @pytest.mark.skip(reason="flaky")
     def test_transcribe_supported_media_formats(
         self, transcribe_create_job, media_file, speech, aws_client
     ):
@@ -322,7 +320,6 @@ def test_failing_start_transcription_job(self, s3_bucket, snapshot, aws_client):
             (None, None),  # without output bucket and output key
         ],
     )
-    @pytest.mark.skip(reason="flaky")
     def test_transcribe_start_job(
         self,
         output_bucket,