From 520bef91ce2ae0b7f5f69a3bde2948cdc238c690 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Mon, 31 Jul 2017 10:04:35 -0700 Subject: [PATCH 1/3] add word time offsets to async sample --- speech/cloud-client/transcribe_async.py | 9 ++++++++- speech/cloud-client/transcribe_async_test.py | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/transcribe_async.py b/speech/cloud-client/transcribe_async.py index 9e5a416a567..64658c90d35 100644 --- a/speech/cloud-client/transcribe_async.py +++ b/speech/cloud-client/transcribe_async.py @@ -79,7 +79,8 @@ def transcribe_gcs(gcs_uri): config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, - language_code='en-US') + language_code='en-US', + enable_word_time_offsets=True) operation = client.long_running_recognize(config, audio) @@ -96,6 +97,12 @@ def transcribe_gcs(gcs_uri): for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) + + for word_info in alternative.words: + print('Word: {}, start_time: {}, end_time: {}'.format( + word_info.word, + word_info.start_time.seconds + word_info.start_time.nanos * 1e-9, + word_info.end_time.seconds + word_info.end_time.nanos * 1e-9)) # [END def_transcribe_gcs] diff --git a/speech/cloud-client/transcribe_async_test.py b/speech/cloud-client/transcribe_async_test.py index 7d66747eb44..c5cbc487d9d 100644 --- a/speech/cloud-client/transcribe_async_test.py +++ b/speech/cloud-client/transcribe_async_test.py @@ -33,3 +33,13 @@ def test_transcribe_gcs(capsys): out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_transcribe_gcs_word_time_offsets(capsys): + transcribe_async.transcribe_gcs( + 'gs://python-docs-samples-tests/speech/audio.flac') + out, err = capsys.readouterr() + + time = float(re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I).group(1)) + + assert time > 0 From 8a72fe1f44b604e2a352e2ca93d127835d3ccaab Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Mon, 31 Jul 2017 10:11:44 -0700 Subject: [PATCH 2/3] udpate client library version in requirements.txt --- speech/cloud-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/cloud-client/requirements.txt b/speech/cloud-client/requirements.txt index 92970530560..a88345ee9a4 100644 --- a/speech/cloud-client/requirements.txt +++ b/speech/cloud-client/requirements.txt @@ -1 +1 @@ -google-cloud-speech==0.27.0 +google-cloud-speech==0.27.1 From 7909933a022a18ed3b383483391a268a1c468431 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Mon, 31 Jul 2017 11:55:29 -0700 Subject: [PATCH 3/3] flake8 --- speech/cloud-client/transcribe_async.py | 9 ++++++--- speech/cloud-client/transcribe_async_test.py | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/speech/cloud-client/transcribe_async.py b/speech/cloud-client/transcribe_async.py index 64658c90d35..b25121217ff 100644 --- a/speech/cloud-client/transcribe_async.py +++ b/speech/cloud-client/transcribe_async.py @@ -99,10 +99,13 @@ def transcribe_gcs(gcs_uri): print('Confidence: {}'.format(alternative.confidence)) for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time print('Word: {}, start_time: {}, end_time: {}'.format( - word_info.word, - word_info.start_time.seconds + word_info.start_time.nanos * 1e-9, - word_info.end_time.seconds + word_info.end_time.nanos * 1e-9)) + word, + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) # [END def_transcribe_gcs] diff --git a/speech/cloud-client/transcribe_async_test.py b/speech/cloud-client/transcribe_async_test.py index c5cbc487d9d..286434d0609 100644 --- a/speech/cloud-client/transcribe_async_test.py +++ b/speech/cloud-client/transcribe_async_test.py @@ -40,6 +40,7 @@ def test_transcribe_gcs_word_time_offsets(capsys): 'gs://python-docs-samples-tests/speech/audio.flac') out, err = capsys.readouterr() - time = float(re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I).group(1)) + match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) + time = float(match.group(1)) - assert time > 0 + assert time > 0