Skip to content

Commit 00bd89b

Browse files
authored
Merge pull request GoogleCloudPlatform#503 from GoogleCloudPlatform/speech-gcs-examples
Adds sync / async examples for local and remote files
2 parents 879aa0d + 7933aa6 commit 00bd89b

File tree

4 files changed

+305
-0
lines changed

4 files changed

+305
-0
lines changed

speech/cloud-client/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,15 @@ You can then run a given `ClassName` via:
2424
### Transcribe a local audio file (using the quickstart sample)
2525

2626
mvn exec:java -Dexec.mainClass=com.example.speech.QuickstartSample
27+
28+
### Transcribe a local audio file (using the recognize sample)
29+
```
30+
mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \
31+
-Dexec.args="syncrecognize ./resources/audio.raw"
32+
```
33+
34+
### Transcribe a remote audio file (using the recognize sample)
35+
```
36+
mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \
37+
-Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'"
38+
```

speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public static void main(String... args) throws Exception {
6464
System.out.printf("Transcription: %s%n", alternative.getTranscript());
6565
}
6666
}
67+
speech.close();
6768
}
6869
}
6970
// [END speech_quickstart]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/*
2+
Copyright 2017, Google Inc.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
import com.google.api.gax.grpc.OperationFuture;
20+
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
21+
import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse;
22+
import com.google.cloud.speech.v1beta1.RecognitionAudio;
23+
import com.google.cloud.speech.v1beta1.RecognitionConfig;
24+
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
25+
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
26+
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
27+
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
28+
import com.google.protobuf.ByteString;
29+
30+
import java.io.IOException;
31+
import java.nio.file.Files;
32+
import java.nio.file.Path;
33+
import java.nio.file.Paths;
34+
import java.util.List;
35+
36+
public class Recognize {
37+
public static void main(String... args) throws Exception {
38+
if (args.length < 1) {
39+
System.out.println("Usage:");
40+
System.out.printf(
41+
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
42+
+ "Commands:\n"
43+
+ "\tsyncrecognize | asyncrecognize\n"
44+
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
45+
+ "for a Cloud Storage resource (gs://...)\n",
46+
Recognize.class.getCanonicalName());
47+
return;
48+
}
49+
String command = args[0];
50+
String path = args.length > 1 ? args[1] : "";
51+
52+
// Use command and GCS path pattern to invoke transcription.
53+
if (command.equals("syncrecognize")) {
54+
if (path.startsWith("gs://")) {
55+
syncRecognizeGcs(path);
56+
} else {
57+
syncRecognizeFile(path);
58+
}
59+
} else if (command.equals("asyncrecognize")) {
60+
if (path.startsWith("gs://")) {
61+
asyncRecognizeGcs(path);
62+
} else {
63+
asyncRecognizeFile(path);
64+
}
65+
}
66+
}
67+
68+
/**
69+
* Performs speech recognition on raw PCM audio and prints the transcription.
70+
*
71+
* @param fileName the path to a PCM audio file to transcribe.
72+
*/
73+
public static void syncRecognizeFile(String fileName) throws Exception, IOException {
74+
SpeechClient speech = SpeechClient.create();
75+
76+
Path path = Paths.get(fileName);
77+
byte[] data = Files.readAllBytes(path);
78+
ByteString audioBytes = ByteString.copyFrom(data);
79+
80+
// Configure request with local raw PCM audio
81+
RecognitionConfig config = RecognitionConfig.newBuilder()
82+
.setEncoding(AudioEncoding.LINEAR16)
83+
.setSampleRate(16000)
84+
.build();
85+
RecognitionAudio audio = RecognitionAudio.newBuilder()
86+
.setContent(audioBytes)
87+
.build();
88+
89+
// Use blocking call to get audio transcript
90+
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
91+
List<SpeechRecognitionResult> results = response.getResultsList();
92+
93+
for (SpeechRecognitionResult result: results) {
94+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
95+
for (SpeechRecognitionAlternative alternative: alternatives) {
96+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
97+
}
98+
}
99+
speech.close();
100+
}
101+
102+
/**
103+
* Performs speech recognition on remote FLAC file and prints the transcription.
104+
*
105+
* @param gcsUri the path to the remote FLAC audio file to transcribe.
106+
*/
107+
public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException {
108+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
109+
SpeechClient speech = SpeechClient.create();
110+
111+
// Builds the request for remote FLAC file
112+
RecognitionConfig config = RecognitionConfig.newBuilder()
113+
.setEncoding(AudioEncoding.FLAC)
114+
.setSampleRate(16000)
115+
.build();
116+
RecognitionAudio audio = RecognitionAudio.newBuilder()
117+
.setUri(gcsUri)
118+
.build();
119+
120+
// Use blocking call for getting audio transcript
121+
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
122+
List<SpeechRecognitionResult> results = response.getResultsList();
123+
124+
for (SpeechRecognitionResult result: results) {
125+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
126+
for (SpeechRecognitionAlternative alternative: alternatives) {
127+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
128+
}
129+
}
130+
speech.close();
131+
}
132+
133+
/**
134+
* Performs non-blocking speech recognition on raw PCM audio and prints
135+
* the transcription.
136+
*
137+
* @param fileName the path to a PCM audio file to transcribe.
138+
*/
139+
public static void asyncRecognizeFile(String fileName) throws Exception, IOException {
140+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
141+
SpeechClient speech = SpeechClient.create();
142+
143+
Path path = Paths.get(fileName);
144+
byte[] data = Files.readAllBytes(path);
145+
ByteString audioBytes = ByteString.copyFrom(data);
146+
147+
// Configure request with local raw PCM audio
148+
RecognitionConfig config = RecognitionConfig.newBuilder()
149+
.setEncoding(AudioEncoding.LINEAR16)
150+
.setSampleRate(16000)
151+
.build();
152+
RecognitionAudio audio = RecognitionAudio.newBuilder()
153+
.setContent(audioBytes)
154+
.build();
155+
156+
// Use non-blocking call for getting file transcription
157+
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
158+
while (!response.isDone()) {
159+
System.out.println("Waiting for response...");
160+
Thread.sleep(200);
161+
}
162+
163+
List<SpeechRecognitionResult> results = response.get().getResultsList();
164+
165+
for (SpeechRecognitionResult result: results) {
166+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
167+
for (SpeechRecognitionAlternative alternative: alternatives) {
168+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
169+
}
170+
}
171+
speech.close();
172+
}
173+
174+
/**
175+
* Performs non-blocking speech recognition on remote FLAC file and prints
176+
* the transcription.
177+
*
178+
* @param gcsUri the path to the remote FLAC audio file to transcribe.
179+
*/
180+
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
181+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
182+
SpeechClient speech = SpeechClient.create();
183+
184+
// Configure remote file request for FLAC file
185+
RecognitionConfig config = RecognitionConfig.newBuilder()
186+
.setEncoding(AudioEncoding.FLAC)
187+
.setSampleRate(16000)
188+
.build();
189+
RecognitionAudio audio = RecognitionAudio.newBuilder()
190+
.setUri(gcsUri)
191+
.build();
192+
193+
// Use non-blocking call for getting file transcription
194+
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
195+
while (!response.isDone()) {
196+
System.out.println("Waiting for response...");
197+
Thread.sleep(200);
198+
}
199+
200+
List<SpeechRecognitionResult> results = response.get().getResultsList();
201+
202+
for (SpeechRecognitionResult result: results) {
203+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
204+
for (SpeechRecognitionAlternative alternative: alternatives) {
205+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
206+
}
207+
}
208+
speech.close();
209+
}
210+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
Copyright 2017, Google, Inc.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
21+
import org.junit.After;
22+
import org.junit.Before;
23+
import org.junit.Test;
24+
import org.junit.runner.RunWith;
25+
import org.junit.runners.JUnit4;
26+
27+
import java.io.ByteArrayOutputStream;
28+
import java.io.PrintStream;
29+
30+
/**
31+
* Tests for speech recognize sample.
32+
*/
33+
@RunWith(JUnit4.class)
34+
@SuppressWarnings("checkstyle:abbreviationaswordinname")
35+
public class RecognizeIT {
36+
private ByteArrayOutputStream bout;
37+
private PrintStream out;
38+
39+
// The path to the audio file to transcribe
40+
private String fileName = "./resources/audio.raw";
41+
private String gcsPath = "gs://cloud-samples-tests/speech/brooklyn.flac";
42+
43+
@Before
44+
public void setUp() {
45+
bout = new ByteArrayOutputStream();
46+
out = new PrintStream(bout);
47+
System.setOut(out);
48+
}
49+
50+
@After
51+
public void tearDown() {
52+
System.setOut(null);
53+
}
54+
55+
@Test
56+
public void testRecognizeFile() throws Exception {
57+
Recognize.syncRecognizeFile(fileName);
58+
String got = bout.toString();
59+
assertThat(got).contains("how old is the Brooklyn Bridge");
60+
}
61+
62+
@Test
63+
public void testRecognizeGcs() throws Exception {
64+
Recognize.syncRecognizeGcs(gcsPath);
65+
String got = bout.toString();
66+
assertThat(got).contains("how old is the Brooklyn Bridge");
67+
}
68+
69+
@Test
70+
public void testAsyncRecognizeFile() throws Exception {
71+
Recognize.asyncRecognizeFile(fileName);
72+
String got = bout.toString();
73+
assertThat(got).contains("how old is the Brooklyn Bridge");
74+
}
75+
76+
@Test
77+
public void testAsyncRecognizeGcs() throws Exception {
78+
Recognize.asyncRecognizeGcs(gcsPath);
79+
String got = bout.toString();
80+
assertThat(got).contains("how old is the Brooklyn Bridge");
81+
}
82+
}

0 commit comments

Comments
 (0)