Skip to content

Add sample for speech api on GCS file via grpc. #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 18, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion speech/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ for more information.
* If you're running the `speech_streaming.py` sample:

```sh
$ pip install -r requirements-speech_streaming.txt
$ pip install -r requirements-speech_grpc.txt
```

The sample uses the [PyAudio][pyaudio] library to stream audio from your
Expand Down
Empty file added speech/api/grpc_auth.py
Empty file.
92 changes: 92 additions & 0 deletions speech/api/speech_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/python
# Copyright (C) 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage,
using GRPC."""

import argparse

from gcloud.credentials import get_credentials
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from grpc.beta import implementations

# Keep the request alive for this many seconds
DEADLINE_SECS = 10
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'


def make_channel(host, port):
"""Creates an SSL channel with auth credentials from the environment."""
# In order to make an https call, use an ssl channel with defaults
ssl_channel = implementations.ssl_channel_credentials(None, None, None)

# Grab application default credentials from the environment
creds = get_credentials().create_scoped([SPEECH_SCOPE])
# Add a plugin to inject the creds into the header
auth_header = (
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
auth_plugin = implementations.metadata_call_credentials(
lambda _, cb: cb([auth_header], None),
name='google_creds')

# compose the two together for both ssl and google auth
composite_channel = implementations.composite_channel_credentials(
ssl_channel, auth_plugin)

return implementations.secure_channel(host, port, composite_channel)


def main(input_uri, output_uri, encoding, sample_rate):
service = cloud_speech.beta_create_Speech_stub(
make_channel('speech.googleapis.com', 443))
# The method and parameters can be inferred from the proto from which the
# grpc client lib was generated. See:
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
initial_request=cloud_speech.InitialRecognizeRequest(
encoding=encoding,
sample_rate=sample_rate,
output_uri=output_uri,
),
audio_request=cloud_speech.AudioRequest(
uri=input_uri,
)
), DEADLINE_SECS)
# This shouldn't actually print anything, since the transcription is output
# to the GCS uri specified
print(response.responses)


def _gcs_uri(text):
if not text.startswith('gs://'):
raise ValueError(
'Cloud Storage uri must be of the form gs://bucket/path/')
return text


PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
'google/cloud/speech/v1/cloud_speech.proto')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input_uri', type=_gcs_uri)
parser.add_argument('output_uri', type=_gcs_uri)
parser.add_argument(
'--encoding', default='FLAC', choices=[
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
parser.add_argument('--sample_rate', default=16000)

args = parser.parse_args()
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
38 changes: 38 additions & 0 deletions speech/api/speech_gcs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2016, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys

import pytest
from speech_gcs import _gcs_uri
from speech_gcs import main


@pytest.mark.skipif(
sys.version_info >= (3, 0),
reason=("grpc doesn't yet support python3 "
'https://github.com/grpc/grpc/issues/282'))
def test_main(cloud_config, capsys):
input_uri = 'gs://{}/speech/clip.flac'.format(cloud_config.storage_bucket)
output_uri = 'gs://{}/speech/clip.txt'.format(cloud_config.storage_bucket)

main(input_uri, output_uri, 'FLAC', 16000)

out, err = capsys.readouterr()
assert '[]\n' == out


def test_gcs_uri():
_gcs_uri('gs://bucket/path')
with pytest.raises(ValueError):
_gcs_uri('/local/path')
28 changes: 21 additions & 7 deletions speech/api/speech_streaming.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
#!/usr/bin/python
# Copyright (C) 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""

import contextlib
import re
import threading

from gcloud.credentials import get_credentials
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
from google.rpc import code_pb2
from grpc.beta import implementations
import pyaudio
Expand Down Expand Up @@ -70,7 +84,7 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
with record_audio(channels, rate, chunk) as audio_stream:
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
metadata = InitialRecognizeRequest(
metadata = cloud_speech.InitialRecognizeRequest(
encoding='LINEAR16', sample_rate=rate,
# Note that setting interim_results to True means that you'll
# likely get multiple results for the same bit of audio, as the
Expand All @@ -80,9 +94,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
interim_results=True, continuous=False,
)
data = audio_stream.read(chunk)
audio_request = AudioRequest(content=data)
audio_request = cloud_speech.AudioRequest(content=data)

yield RecognizeRequest(
yield cloud_speech.RecognizeRequest(
initial_request=metadata,
audio_request=audio_request)

Expand All @@ -91,9 +105,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
if not data:
raise StopIteration()
# Subsequent requests can all just have the content
audio_request = AudioRequest(content=data)
audio_request = cloud_speech.AudioRequest(content=data)

yield RecognizeRequest(audio_request=audio_request)
yield cloud_speech.RecognizeRequest(audio_request=audio_request)


def listen_print_loop(recognize_stream):
Expand All @@ -116,7 +130,7 @@ def listen_print_loop(recognize_stream):

def main():
stop_audio = threading.Event()
with beta_create_Speech_stub(
with cloud_speech.beta_create_Speech_stub(
make_channel('speech.googleapis.com', 443)) as service:
try:
listen_print_loop(
Expand Down