From 4f08c103a4449a892c06ed614dc1f2c001f8db9e Mon Sep 17 00:00:00 2001 From: Xinjie Zheng Date: Fri, 1 Jul 2016 20:12:51 -0700 Subject: [PATCH 1/3] Adding async sample code to speech cloud --- speech/api/requirements-speech_grpc.txt | 2 +- speech/api/speech_async_grpc.py | 105 ++++++++++++++++++ speech/api/speech_async_grpc_test.py | 38 +++++++ speech/api/speech_async_rest.py | 95 ++++++++++++++++ speech/api/speech_async_rest_test.py | 23 ++++ speech/api/{speech_gcs.py => speech_grpc.py} | 0 ...speech_gcs_test.py => speech_grpc_test.py} | 4 +- 7 files changed, 264 insertions(+), 3 deletions(-) create mode 100644 speech/api/speech_async_grpc.py create mode 100644 speech/api/speech_async_grpc_test.py create mode 100644 speech/api/speech_async_rest.py create mode 100644 speech/api/speech_async_rest_test.py rename speech/api/{speech_gcs.py => speech_grpc.py} (100%) rename speech/api/{speech_gcs_test.py => speech_grpc_test.py} (94%) diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt index 443a444c5de..9a3a75a322f 100644 --- a/speech/api/requirements-speech_grpc.txt +++ b/speech/api/requirements-speech_grpc.txt @@ -1,4 +1,4 @@ gcloud==0.17.0 grpcio==0.14.0 PyAudio==0.2.9 -grpc-google-cloud-speech-v1beta1==1.0.0 +grpc-google-cloud-speech-v1beta1==1.0.1 diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py new file mode 100644 index 00000000000..3095e6f37ef --- /dev/null +++ b/speech/api/speech_async_grpc.py @@ -0,0 +1,105 @@ +#!/usr/bin/python +# Copyright (C) 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage, +using async GRPC.""" + +import argparse +import time + +from gcloud.credentials import get_credentials +from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech +from google.longrunning import operations_grpc_pb2 as operations_grpc +from grpc.beta import implementations + +# Keep the request alive for this many seconds +DEADLINE_SECS = 10 +SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + +def make_channel(host, port): + """Creates an SSL channel with auth credentials from the environment.""" + # In order to make an https call, use an ssl channel with defaults + ssl_channel = implementations.ssl_channel_credentials(None, None, None) + + # Grab application default credentials from the environment + creds = get_credentials().create_scoped([SPEECH_SCOPE]) + # Add a plugin to inject the creds into the header + auth_header = ( + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) + auth_plugin = implementations.metadata_call_credentials( + lambda _, cb: cb([auth_header], None), + name='google_creds') + + # compose the two together for both ssl and google auth + composite_channel = implementations.composite_channel_credentials( + ssl_channel, auth_plugin) + + return implementations.secure_channel(host, port, composite_channel) + + +def main(input_uri, encoding, sample_rate): + channel = make_channel('speech.googleapis.com', 443) + service = cloud_speech.beta_create_Speech_stub(channel) + # The method and parameters can be inferred from the proto from which the + # grpc client lib was generated. See: + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto + response = service.AsyncRecognize(cloud_speech.AsyncRecognizeRequest( + config=cloud_speech.RecognitionConfig( + encoding=encoding, + sample_rate=sample_rate, + ), + audio=cloud_speech.RecognitionAudio( + uri=input_uri, + ) + ), DEADLINE_SECS) + # Print the longrunning operation handle. + print(response) + + # Give the server a few seconds to process. + print('Waiting 5 seconds for server processing...') + time.sleep(5) + # Construct a long running operation endpoint. + service = operations_grpc.beta_create_Operations_stub(channel) + # Get the long running operation with response. + response = service.GetOperation( + operations_grpc.GetOperationRequest(name=response.name), + DEADLINE_SECS) + # Print the recognition results. + results = cloud_speech.AsyncRecognizeResponse() + response.response.Unpack(results) + print(results) + + +def _gcs_uri(text): + if not text.startswith('gs://'): + raise ValueError( + 'Cloud Storage uri must be of the form gs://bucket/path/') + return text + + +PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' + 'google/cloud/speech/v1beta1/cloud_speech.proto') +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_uri', type=_gcs_uri) + parser.add_argument( + '--encoding', default='FLAC', choices=[ + 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], + help='How the audio file is encoded. See {}#L67'.format(PROTO_URL)) + parser.add_argument('--sample_rate', default=16000) + + args = parser.parse_args() + main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_async_grpc_test.py b/speech/api/speech_async_grpc_test.py new file mode 100644 index 00000000000..481a2610f8e --- /dev/null +++ b/speech/api/speech_async_grpc_test.py @@ -0,0 +1,38 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys + +import pytest +from speech_async_grpc import _gcs_uri +from speech_async_grpc import main + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), + reason=("grpc doesn't yet support python3 " + 'https://github.com/grpc/grpc/issues/282')) +def test_main(cloud_config, capsys): + input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) + + main(input_uri, 'FLAC', 16000) + + out, err = capsys.readouterr() + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_gcs_uri(): + _gcs_uri('gs://bucket/path') + with pytest.raises(ValueError): + _gcs_uri('/local/path') diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py new file mode 100644 index 00000000000..954f8e707f9 --- /dev/null +++ b/speech/api/speech_async_rest.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Speech API sample application using the REST API for async +batch processing.""" + +# [START import_libraries] +import argparse +import base64 +import json +import time + +from googleapiclient import discovery +import httplib2 +from oauth2client.client import GoogleCredentials +# [END import_libraries] + + +# [START authenticating] +DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' + 'version={apiVersion}') + + +# Application default credentials provided by env variable +# GOOGLE_APPLICATION_CREDENTIALS +def get_speech_service(): + credentials = GoogleCredentials.get_application_default().create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + credentials.authorize(http) + + return discovery.build( + 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) +# [END authenticating] + + +def main(speech_file): + """Transcribe the given audio file asynchronously. + + Args: + speech_file: the name of the audio file. + """ + # [START construct_request] + with open(speech_file, 'rb') as speech: + # Base64 encode the binary audio file for inclusion in the JSON + # request. + speech_content = base64.b64encode(speech.read()) + + service = get_speech_service() + service_request = service.speech().asyncrecognize( + body={ + 'config': { + 'encoding': 'LINEAR16', + 'sampleRate': 16000 + }, + 'audio': { + 'content': speech_content.decode('UTF-8') + } + }) + # [END construct_request] + # [START send_request] + response = service_request.execute() + print(json.dumps(response)) + # [END send_request] + + # Give the server a few seconds to process. + print('Waiting 5 seconds for server processing...') + time.sleep(5) + # Construct a GetOperation request. + name = response['name'] + service_request = service.operations().get(name=name) + # Get the long running operation with response. + response = service_request.execute() + print(json.dumps(response['response']['results'])) + + +# [START run_application] +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'speech_file', help='Full path of audio file to be recognized') + args = parser.parse_args() + main(args.speech_file) + # [END run_application] diff --git a/speech/api/speech_async_rest_test.py b/speech/api/speech_async_rest_test.py new file mode 100644 index 00000000000..d9f79e6aac5 --- /dev/null +++ b/speech/api/speech_async_rest_test.py @@ -0,0 +1,23 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from speech_async_rest import main + + +def test_main(resource, capsys): + main(resource('audio.raw')) + out, err = capsys.readouterr() + + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/api/speech_gcs.py b/speech/api/speech_grpc.py similarity index 100% rename from speech/api/speech_gcs.py rename to speech/api/speech_grpc.py diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_grpc_test.py similarity index 94% rename from speech/api/speech_gcs_test.py rename to speech/api/speech_grpc_test.py index 56f2b4a42d3..8b0ae9b6f61 100644 --- a/speech/api/speech_gcs_test.py +++ b/speech/api/speech_grpc_test.py @@ -15,8 +15,8 @@ import sys import pytest -from speech_gcs import _gcs_uri -from speech_gcs import main +from speech_grpc import _gcs_uri +from speech_grpc import main @pytest.mark.skipif( From c413af8b7d5cc0f1bc38660b01fbc31e2d577bf3 Mon Sep 17 00:00:00 2001 From: xinjie Date: Sat, 2 Jul 2016 09:26:25 -0700 Subject: [PATCH 2/3] Better waiting for long running operation --- speech/api/speech_async_grpc.py | 19 ++++++++++++------- speech/api/speech_async_rest.py | 16 ++++++++++------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py index 3095e6f37ef..55d4bfe1a19 100644 --- a/speech/api/speech_async_grpc.py +++ b/speech/api/speech_async_grpc.py @@ -68,15 +68,20 @@ def main(input_uri, encoding, sample_rate): # Print the longrunning operation handle. print(response) - # Give the server a few seconds to process. - print('Waiting 5 seconds for server processing...') - time.sleep(5) # Construct a long running operation endpoint. service = operations_grpc.beta_create_Operations_stub(channel) - # Get the long running operation with response. - response = service.GetOperation( - operations_grpc.GetOperationRequest(name=response.name), - DEADLINE_SECS) + + name = response.name + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service.GetOperation( + operations_grpc.GetOperationRequest(name=name), DEADLINE_SECS) + if response.done: + break + # Print the recognition results. results = cloud_speech.AsyncRecognizeResponse() response.response.Unpack(results) diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py index 954f8e707f9..647a843d3ae 100644 --- a/speech/api/speech_async_rest.py +++ b/speech/api/speech_async_rest.py @@ -74,14 +74,18 @@ def main(speech_file): print(json.dumps(response)) # [END send_request] - # Give the server a few seconds to process. - print('Waiting 5 seconds for server processing...') - time.sleep(5) - # Construct a GetOperation request. name = response['name'] + # Construct a GetOperation request. service_request = service.operations().get(name=name) - # Get the long running operation with response. - response = service_request.execute() + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service_request.execute() + if 'done' in response and response['done']: + break + print(json.dumps(response['response']['results'])) From ad385c121ca54085a1d2bba6b2f2675a39e58fb4 Mon Sep 17 00:00:00 2001 From: xinjie Date: Wed, 6 Jul 2016 11:32:10 -0700 Subject: [PATCH 3/3] review --- speech/api/speech_async_grpc.py | 33 +++++++++++++++++++-------------- speech/api/speech_async_rest.py | 11 +++++------ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py index 55d4bfe1a19..3186d9f3060 100644 --- a/speech/api/speech_async_grpc.py +++ b/speech/api/speech_async_grpc.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (C) 2016 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Sample that transcribes a FLAC audio file stored in Google Cloud Storage, using async GRPC.""" @@ -19,8 +20,8 @@ import time from gcloud.credentials import get_credentials -from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech -from google.longrunning import operations_grpc_pb2 as operations_grpc +from google.cloud.speech.v1beta1 import cloud_speech_pb2 +from google.longrunning import operations_grpc_pb2 from grpc.beta import implementations # Keep the request alive for this many seconds @@ -52,58 +53,62 @@ def make_channel(host, port): def main(input_uri, encoding, sample_rate): channel = make_channel('speech.googleapis.com', 443) - service = cloud_speech.beta_create_Speech_stub(channel) + service = cloud_speech_pb2.beta_create_Speech_stub(channel) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto - response = service.AsyncRecognize(cloud_speech.AsyncRecognizeRequest( - config=cloud_speech.RecognitionConfig( + response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest( + config=cloud_speech_pb2.RecognitionConfig( encoding=encoding, sample_rate=sample_rate, ), - audio=cloud_speech.RecognitionAudio( + audio=cloud_speech_pb2.RecognitionAudio( uri=input_uri, ) ), DEADLINE_SECS) + # Print the longrunning operation handle. print(response) # Construct a long running operation endpoint. - service = operations_grpc.beta_create_Operations_stub(channel) + service = operations_grpc_pb2.beta_create_Operations_stub(channel) name = response.name + while True: # Give the server a few seconds to process. print('Waiting for server processing...') time.sleep(1) # Get the long running operation with response. response = service.GetOperation( - operations_grpc.GetOperationRequest(name=name), DEADLINE_SECS) + operations_grpc_pb2.GetOperationRequest(name=name), + DEADLINE_SECS) + if response.done: break # Print the recognition results. - results = cloud_speech.AsyncRecognizeResponse() + results = cloud_speech_pb2.AsyncRecognizeResponse() response.response.Unpack(results) print(results) def _gcs_uri(text): if not text.startswith('gs://'): - raise ValueError( + raise argparse.ArgumentTypeError( 'Cloud Storage uri must be of the form gs://bucket/path/') return text -PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' - 'google/cloud/speech/v1beta1/cloud_speech.proto') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('input_uri', type=_gcs_uri) parser.add_argument( '--encoding', default='FLAC', choices=[ 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], - help='How the audio file is encoded. See {}#L67'.format(PROTO_URL)) + help='How the audio file is encoded. See {}#L67'.format( + 'https://github.com/googleapis/googleapis/blob/master/' + 'google/cloud/speech/v1beta1/cloud_speech.proto')) parser.add_argument('--sample_rate', default=16000) args = parser.parse_args() diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py index 647a843d3ae..c0ddbdb4cd5 100644 --- a/speech/api/speech_async_rest.py +++ b/speech/api/speech_async_rest.py @@ -22,7 +22,6 @@ import time from googleapiclient import discovery -import httplib2 from oauth2client.client import GoogleCredentials # [END import_libraries] @@ -37,11 +36,10 @@ def get_speech_service(): credentials = GoogleCredentials.get_application_default().create_scoped( ['https://www.googleapis.com/auth/cloud-platform']) - http = httplib2.Http() - credentials.authorize(http) return discovery.build( - 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) + 'speech', 'v1beta1', credentials=credentials, + discoveryServiceUrl=DISCOVERY_URL) # [END authenticating] @@ -53,8 +51,7 @@ def main(speech_file): """ # [START construct_request] with open(speech_file, 'rb') as speech: - # Base64 encode the binary audio file for inclusion in the JSON - # request. + # Base64 encode the binary audio file for inclusion in the request. speech_content = base64.b64encode(speech.read()) service = get_speech_service() @@ -77,12 +74,14 @@ def main(speech_file): name = response['name'] # Construct a GetOperation request. service_request = service.operations().get(name=name) + while True: # Give the server a few seconds to process. print('Waiting for server processing...') time.sleep(1) # Get the long running operation with response. response = service_request.execute() + if 'done' in response and response['done']: break