Skip to content

Commit 7085b51

Browse files
authored
Merge pull request GoogleCloudPlatform#601 from GoogleCloudPlatform/speech-v1
Speech v1
2 parents 789e54a + ff201b5 commit 7085b51

File tree

5 files changed

+142
-34
lines changed

5 files changed

+142
-34
lines changed

speech/cloud-client/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ You can then run a given `ClassName` via:
3434
### Transcribe a remote audio file (using the recognize sample)
3535
```
3636
mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \
37-
-Dexec.args="syncrecognize 'gs://java-docs-samples-tests/speech/brooklyn.flac'"
37+
-Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'"
3838
```

speech/cloud-client/pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737
<!-- [START dependencies] -->
3838
<dependency>
3939
<groupId>com.google.cloud</groupId>
40-
<artifactId>google-cloud-speech</artifactId>
41-
<version>0.8.1-alpha</version>
40+
<artifactId>google-cloud</artifactId>
41+
<version>0.11.2-alpha</version>
4242
</dependency>
4343
<!-- [END dependencies] -->
4444

speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java

+10-9
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818

1919
// [START speech_quickstart]
2020
// Imports the Google Cloud client library
21-
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
22-
import com.google.cloud.speech.v1beta1.RecognitionAudio;
23-
import com.google.cloud.speech.v1beta1.RecognitionConfig;
24-
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
25-
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
26-
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
27-
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
21+
import com.google.cloud.speech.spi.v1.SpeechClient;
22+
import com.google.cloud.speech.v1.RecognitionAudio;
23+
import com.google.cloud.speech.v1.RecognitionConfig;
24+
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
25+
import com.google.cloud.speech.v1.RecognizeResponse;
26+
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
27+
import com.google.cloud.speech.v1.SpeechRecognitionResult;
2828
import com.google.protobuf.ByteString;
2929

3030
import java.nio.file.Files;
@@ -48,14 +48,15 @@ public static void main(String... args) throws Exception {
4848
// Builds the sync recognize request
4949
RecognitionConfig config = RecognitionConfig.newBuilder()
5050
.setEncoding(AudioEncoding.LINEAR16)
51-
.setSampleRate(16000)
51+
.setSampleRateHertz(16000)
52+
.setLanguageCode("en-US")
5253
.build();
5354
RecognitionAudio audio = RecognitionAudio.newBuilder()
5455
.setContent(audioBytes)
5556
.build();
5657

5758
// Performs speech recognition on the audio file
58-
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
59+
RecognizeResponse response = speech.recognize(config, audio);
5960
List<SpeechRecognitionResult> results = response.getResultsList();
6061

6162
for (SpeechRecognitionResult result: results) {

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

+119-19
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,22 @@
1616

1717
package com.example.speech;
1818

19+
import com.google.api.gax.core.ApiStreamObserver;
1920
import com.google.api.gax.grpc.OperationFuture;
20-
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
21-
import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse;
22-
import com.google.cloud.speech.v1beta1.RecognitionAudio;
23-
import com.google.cloud.speech.v1beta1.RecognitionConfig;
24-
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
25-
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
26-
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
27-
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
21+
import com.google.api.gax.grpc.StreamingCallable;
22+
import com.google.cloud.speech.spi.v1.SpeechClient;
23+
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
24+
import com.google.cloud.speech.v1.RecognitionAudio;
25+
import com.google.cloud.speech.v1.RecognitionConfig;
26+
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
27+
import com.google.cloud.speech.v1.RecognizeResponse;
28+
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
29+
import com.google.cloud.speech.v1.SpeechRecognitionResult;
30+
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
31+
import com.google.cloud.speech.v1.StreamingRecognitionResult;
32+
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
33+
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
34+
import com.google.common.util.concurrent.SettableFuture;
2835
import com.google.protobuf.ByteString;
2936

3037
import java.io.IOException;
@@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
4047
System.out.printf(
4148
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
4249
+ "Commands:\n"
43-
+ "\tsyncrecognize | asyncrecognize\n"
50+
+ "\tsyncrecognize | asyncrecognize | streamrecognize\n"
4451
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
4552
+ "for a Cloud Storage resource (gs://...)\n",
4653
Recognize.class.getCanonicalName());
@@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
6269
} else {
6370
asyncRecognizeFile(path);
6471
}
72+
} else if (command.equals("streamrecognize")) {
73+
streamingRecognizeFile(path);
74+
//streamingRecognizeEasy(path);
6575
}
76+
6677
}
6778

6879
/**
@@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
8091
// Configure request with local raw PCM audio
8192
RecognitionConfig config = RecognitionConfig.newBuilder()
8293
.setEncoding(AudioEncoding.LINEAR16)
83-
.setSampleRate(16000)
94+
.setLanguageCode("en-US")
95+
.setSampleRateHertz(16000)
8496
.build();
8597
RecognitionAudio audio = RecognitionAudio.newBuilder()
8698
.setContent(audioBytes)
8799
.build();
88100

89101
// Use blocking call to get audio transcript
90-
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
102+
RecognizeResponse response = speech.recognize(config, audio);
91103
List<SpeechRecognitionResult> results = response.getResultsList();
92104

93105
for (SpeechRecognitionResult result: results) {
@@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
111123
// Builds the request for remote FLAC file
112124
RecognitionConfig config = RecognitionConfig.newBuilder()
113125
.setEncoding(AudioEncoding.FLAC)
114-
.setSampleRate(16000)
126+
.setLanguageCode("en-US")
127+
.setSampleRateHertz(16000)
115128
.build();
116129
RecognitionAudio audio = RecognitionAudio.newBuilder()
117130
.setUri(gcsUri)
118131
.build();
119132

120133
// Use blocking call for getting audio transcript
121-
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
134+
RecognizeResponse response = speech.recognize(config, audio);
122135
List<SpeechRecognitionResult> results = response.getResultsList();
123136

124137
for (SpeechRecognitionResult result: results) {
@@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
130143
speech.close();
131144
}
132145

146+
/*
133147
/**
134148
* Performs non-blocking speech recognition on raw PCM audio and prints
135149
* the transcription.
@@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
147161
// Configure request with local raw PCM audio
148162
RecognitionConfig config = RecognitionConfig.newBuilder()
149163
.setEncoding(AudioEncoding.LINEAR16)
150-
.setSampleRate(16000)
164+
.setLanguageCode("en-US")
165+
.setSampleRateHertz(16000)
151166
.build();
152167
RecognitionAudio audio = RecognitionAudio.newBuilder()
153168
.setContent(audioBytes)
154169
.build();
155170

156171
// Use non-blocking call for getting file transcription
157-
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
172+
OperationFuture<LongRunningRecognizeResponse> response =
173+
speech.longRunningRecognizeAsync(config, audio);
158174
while (!response.isDone()) {
159175
System.out.println("Waiting for response...");
160176
Thread.sleep(200);
@@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
175191
* Performs non-blocking speech recognition on remote FLAC file and prints
176192
* the transcription.
177193
*
178-
* @param gcsUri the path to the remote FLAC audio file to transcribe.
194+
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
179195
*/
180196
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
181197
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
182198
SpeechClient speech = SpeechClient.create();
183199

184-
// Configure remote file request for FLAC file
200+
// Configure remote file request for Linear16
185201
RecognitionConfig config = RecognitionConfig.newBuilder()
186202
.setEncoding(AudioEncoding.FLAC)
187-
.setSampleRate(16000)
203+
.setLanguageCode("en-US")
204+
.setSampleRateHertz(16000)
188205
.build();
189206
RecognitionAudio audio = RecognitionAudio.newBuilder()
190207
.setUri(gcsUri)
191208
.build();
192209

193210
// Use non-blocking call for getting file transcription
194-
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
211+
OperationFuture<LongRunningRecognizeResponse> response =
212+
speech.longRunningRecognizeAsync(config, audio);
195213
while (!response.isDone()) {
196214
System.out.println("Waiting for response...");
197215
Thread.sleep(200);
@@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
207225
}
208226
speech.close();
209227
}
228+
229+
/**
230+
* Performs streaming speech recognition on raw PCM audio data.
231+
*
232+
* @param fileName the path to a PCM audio file to transcribe.
233+
*/
234+
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
235+
Path path = Paths.get(fileName);
236+
byte[] data = Files.readAllBytes(path);
237+
238+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
239+
SpeechClient speech = SpeechClient.create();
240+
241+
// Configure request with local raw PCM audio
242+
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
243+
.setEncoding(AudioEncoding.LINEAR16)
244+
.setLanguageCode("en-US")
245+
.setSampleRateHertz(16000)
246+
.build();
247+
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
248+
.setConfig(recConfig)
249+
.build();
250+
251+
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
252+
private final SettableFuture<List<T>> future = SettableFuture.create();
253+
private final List<T> messages = new java.util.ArrayList<T>();
254+
255+
@Override
256+
public void onNext(T message) {
257+
messages.add(message);
258+
}
259+
260+
@Override
261+
public void onError(Throwable t) {
262+
future.setException(t);
263+
}
264+
265+
@Override
266+
public void onCompleted() {
267+
future.set(messages);
268+
}
269+
270+
// Returns the SettableFuture object to get received messages / exceptions.
271+
public SettableFuture<List<T>> future() {
272+
return future;
273+
}
274+
}
275+
276+
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
277+
new ResponseApiStreamingObserver<StreamingRecognizeResponse>();
278+
279+
StreamingCallable<StreamingRecognizeRequest,StreamingRecognizeResponse> callable =
280+
speech.streamingRecognizeCallable();
281+
282+
ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
283+
callable.bidiStreamingCall(responseObserver);
284+
285+
// The first request must **only** contain the audio configuration:
286+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
287+
.setStreamingConfig(config)
288+
.build());
289+
290+
// Subsequent requests must **only** contain the audio data.
291+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
292+
.setAudioContent(ByteString.copyFrom(data))
293+
.build());
294+
295+
// Mark transmission as completed after sending the data.
296+
requestObserver.onCompleted();
297+
298+
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
299+
300+
for (StreamingRecognizeResponse response: responses) {
301+
for (StreamingRecognitionResult result: response.getResultsList()) {
302+
for (SpeechRecognitionAlternative alternative : result.getAlternativesList()) {
303+
System.out.println(alternative.getTranscript());
304+
}
305+
}
306+
}
307+
speech.close();
308+
}
309+
210310
}

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

+10-3
Original file line numberDiff line numberDiff line change
@@ -61,25 +61,32 @@ public void testRecognizeFile() throws Exception {
6161
String got = bout.toString();
6262
assertThat(got).contains("how old is the Brooklyn Bridge");
6363
}
64-
64+
6565
@Test
6666
public void testRecognizeGcs() throws Exception {
6767
Recognize.syncRecognizeGcs(gcsPath);
6868
String got = bout.toString();
6969
assertThat(got).contains("how old is the Brooklyn Bridge");
7070
}
71-
71+
7272
@Test
7373
public void testAsyncRecognizeFile() throws Exception {
7474
Recognize.asyncRecognizeFile(fileName);
7575
String got = bout.toString();
7676
assertThat(got).contains("how old is the Brooklyn Bridge");
7777
}
78-
78+
7979
@Test
8080
public void testAsyncRecognizeGcs() throws Exception {
8181
Recognize.asyncRecognizeGcs(gcsPath);
8282
String got = bout.toString();
8383
assertThat(got).contains("how old is the Brooklyn Bridge");
8484
}
85+
86+
@Test
87+
public void testStreamRecognize() throws Exception {
88+
Recognize.streamingRecognizeFile(fileName);
89+
String got = bout.toString();
90+
assertThat(got).contains("how old is the Brooklyn Bridge");
91+
}
8592
}

0 commit comments

Comments
 (0)