Skip to content

Commit f6d7fe4

Browse files
dizcologyJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Speech model selection (GoogleCloudPlatform#1361)
* add transcribe_model_selection * add transcribe_model_selection_test * flake
1 parent 7370677 commit f6d7fe4

File tree

3 files changed

+137
-0
lines changed

3 files changed

+137
-0
lines changed
1.7 MB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates how to select the model
18+
used for speech recognition.
19+
20+
Example usage:
21+
python transcribe_model_selection.py \
22+
resources/Google_Gnome.wav --model video
23+
python transcribe_model_selection.py \
24+
gs://cloud-samples-tests/speech/Google_Gnome.wav --model video
25+
"""
26+
27+
import argparse
28+
29+
30+
# [START speech_transcribe_model_selection]
31+
def transcribe_model_selection(speech_file, model):
32+
"""Transcribe the given audio file synchronously with
33+
the selected model."""
34+
from google.cloud import speech_v1p1beta1 as speech
35+
client = speech.SpeechClient()
36+
37+
with open(speech_file, 'rb') as audio_file:
38+
content = audio_file.read()
39+
40+
audio = speech.types.RecognitionAudio(content=content)
41+
42+
config = speech.types.RecognitionConfig(
43+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
44+
sample_rate_hertz=16000,
45+
language_code='en-US',
46+
model=model)
47+
48+
response = client.recognize(config, audio)
49+
50+
for i, result in enumerate(response.results):
51+
alternative = result.alternatives[0]
52+
print('-' * 20)
53+
print('First alternative of result {}'.format(i))
54+
print('Transcript: {}'.format(alternative.transcript))
55+
# [END speech_transcribe_model_selection]
56+
57+
58+
# [START speech_transcribe_model_selection_gcs]
59+
def transcribe_model_selection_gcs(gcs_uri, model):
60+
"""Transcribe the given audio file asynchronously with
61+
the selected model."""
62+
from google.cloud import speech_v1p1beta1 as speech
63+
client = speech.SpeechClient()
64+
65+
audio = speech.types.RecognitionAudio(uri=gcs_uri)
66+
67+
config = speech.types.RecognitionConfig(
68+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
69+
sample_rate_hertz=16000,
70+
language_code='en-US',
71+
model=model)
72+
73+
operation = client.long_running_recognize(config, audio)
74+
75+
print('Waiting for operation to complete...')
76+
response = operation.result(timeout=90)
77+
78+
for i, result in enumerate(response.results):
79+
alternative = result.alternatives[0]
80+
print('-' * 20)
81+
print('First alternative of result {}'.format(i))
82+
print('Transcript: {}'.format(alternative.transcript))
83+
# [END speech_transcribe_model_selection_gcs]
84+
85+
86+
if __name__ == '__main__':
87+
parser = argparse.ArgumentParser(
88+
description=__doc__,
89+
formatter_class=argparse.RawDescriptionHelpFormatter)
90+
parser.add_argument(
91+
'path', help='File or GCS path for audio file to be recognized')
92+
parser.add_argument(
93+
'--model', help='The speech recognition model to use',
94+
choices=['command_and_search', 'phone_call', 'video', 'default'],
95+
default='default')
96+
97+
args = parser.parse_args()
98+
99+
if args.path.startswith('gs://'):
100+
transcribe_model_selection_gcs(args.path, args.model)
101+
else:
102+
transcribe_model_selection(args.path, args.model)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
17+
import transcribe_model_selection
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
20+
21+
22+
def test_transcribe_model_selection_file(capsys):
23+
transcribe_model_selection.transcribe_model_selection(
24+
os.path.join(RESOURCES, 'Google_Gnome.wav'), 'video')
25+
out, err = capsys.readouterr()
26+
27+
assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I)
28+
29+
30+
def test_transcribe_model_selection_gcs(capsys):
31+
transcribe_model_selection.transcribe_model_selection_gcs(
32+
'gs://cloud-samples-tests/speech/Google_Gnome.wav', 'video')
33+
out, err = capsys.readouterr()
34+
35+
assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)