16
16
17
17
package com .example .speech ;
18
18
19
+ import com .google .api .gax .core .ApiStreamObserver ;
19
20
import com .google .api .gax .grpc .OperationFuture ;
20
- import com .google .cloud .speech .spi .v1beta1 .SpeechClient ;
21
- import com .google .cloud .speech .v1beta1 .AsyncRecognizeResponse ;
22
- import com .google .cloud .speech .v1beta1 .RecognitionAudio ;
23
- import com .google .cloud .speech .v1beta1 .RecognitionConfig ;
24
- import com .google .cloud .speech .v1beta1 .RecognitionConfig .AudioEncoding ;
25
- import com .google .cloud .speech .v1beta1 .SpeechRecognitionAlternative ;
26
- import com .google .cloud .speech .v1beta1 .SpeechRecognitionResult ;
27
- import com .google .cloud .speech .v1beta1 .SyncRecognizeResponse ;
21
+ import com .google .api .gax .grpc .StreamingCallable ;
22
+ import com .google .cloud .speech .spi .v1 .SpeechClient ;
23
+ import com .google .cloud .speech .v1 .LongRunningRecognizeResponse ;
24
+ import com .google .cloud .speech .v1 .RecognitionAudio ;
25
+ import com .google .cloud .speech .v1 .RecognitionConfig ;
26
+ import com .google .cloud .speech .v1 .RecognitionConfig .AudioEncoding ;
27
+ import com .google .cloud .speech .v1 .RecognizeResponse ;
28
+ import com .google .cloud .speech .v1 .SpeechRecognitionAlternative ;
29
+ import com .google .cloud .speech .v1 .SpeechRecognitionResult ;
30
+ import com .google .cloud .speech .v1 .StreamingRecognitionConfig ;
31
+ import com .google .cloud .speech .v1 .StreamingRecognitionResult ;
32
+ import com .google .cloud .speech .v1 .StreamingRecognizeRequest ;
33
+ import com .google .cloud .speech .v1 .StreamingRecognizeResponse ;
34
+ import com .google .common .util .concurrent .SettableFuture ;
28
35
import com .google .protobuf .ByteString ;
29
36
30
37
import java .io .IOException ;
@@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
40
47
System .out .printf (
41
48
"\t java %s \" <command>\" \" <path-to-image>\" \n "
42
49
+ "Commands:\n "
43
- + "\t syncrecognize | asyncrecognize\n "
50
+ + "\t syncrecognize | asyncrecognize | streamrecognize \n "
44
51
+ "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
45
52
+ "for a Cloud Storage resource (gs://...)\n " ,
46
53
Recognize .class .getCanonicalName ());
@@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
62
69
} else {
63
70
asyncRecognizeFile (path );
64
71
}
72
+ } else if (command .equals ("streamrecognize" )) {
73
+ streamingRecognizeFile (path );
74
+ //streamingRecognizeEasy(path);
65
75
}
76
+
66
77
}
67
78
68
79
/**
@@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
80
91
// Configure request with local raw PCM audio
81
92
RecognitionConfig config = RecognitionConfig .newBuilder ()
82
93
.setEncoding (AudioEncoding .LINEAR16 )
83
- .setSampleRate (16000 )
94
+ .setLanguageCode ("en-US" )
95
+ .setSampleRateHertz (16000 )
84
96
.build ();
85
97
RecognitionAudio audio = RecognitionAudio .newBuilder ()
86
98
.setContent (audioBytes )
87
99
.build ();
88
100
89
101
// Use blocking call to get audio transcript
90
- SyncRecognizeResponse response = speech .syncRecognize (config , audio );
102
+ RecognizeResponse response = speech .recognize (config , audio );
91
103
List <SpeechRecognitionResult > results = response .getResultsList ();
92
104
93
105
for (SpeechRecognitionResult result : results ) {
@@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
111
123
// Builds the request for remote FLAC file
112
124
RecognitionConfig config = RecognitionConfig .newBuilder ()
113
125
.setEncoding (AudioEncoding .FLAC )
114
- .setSampleRate (16000 )
126
+ .setLanguageCode ("en-US" )
127
+ .setSampleRateHertz (16000 )
115
128
.build ();
116
129
RecognitionAudio audio = RecognitionAudio .newBuilder ()
117
130
.setUri (gcsUri )
118
131
.build ();
119
132
120
133
// Use blocking call for getting audio transcript
121
- SyncRecognizeResponse response = speech .syncRecognize (config , audio );
134
+ RecognizeResponse response = speech .recognize (config , audio );
122
135
List <SpeechRecognitionResult > results = response .getResultsList ();
123
136
124
137
for (SpeechRecognitionResult result : results ) {
@@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
130
143
speech .close ();
131
144
}
132
145
146
+ /*
133
147
/**
134
148
* Performs non-blocking speech recognition on raw PCM audio and prints
135
149
* the transcription.
@@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
147
161
// Configure request with local raw PCM audio
148
162
RecognitionConfig config = RecognitionConfig .newBuilder ()
149
163
.setEncoding (AudioEncoding .LINEAR16 )
150
- .setSampleRate (16000 )
164
+ .setLanguageCode ("en-US" )
165
+ .setSampleRateHertz (16000 )
151
166
.build ();
152
167
RecognitionAudio audio = RecognitionAudio .newBuilder ()
153
168
.setContent (audioBytes )
154
169
.build ();
155
170
156
171
// Use non-blocking call for getting file transcription
157
- OperationFuture <AsyncRecognizeResponse > response = speech .asyncRecognizeAsync (config , audio );
172
+ OperationFuture <LongRunningRecognizeResponse > response =
173
+ speech .longRunningRecognizeAsync (config , audio );
158
174
while (!response .isDone ()) {
159
175
System .out .println ("Waiting for response..." );
160
176
Thread .sleep (200 );
@@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
175
191
* Performs non-blocking speech recognition on remote FLAC file and prints
176
192
* the transcription.
177
193
*
178
- * @param gcsUri the path to the remote FLAC audio file to transcribe.
194
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
179
195
*/
180
196
public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
181
197
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
182
198
SpeechClient speech = SpeechClient .create ();
183
199
184
- // Configure remote file request for FLAC file
200
+ // Configure remote file request for Linear16
185
201
RecognitionConfig config = RecognitionConfig .newBuilder ()
186
202
.setEncoding (AudioEncoding .FLAC )
187
- .setSampleRate (16000 )
203
+ .setLanguageCode ("en-US" )
204
+ .setSampleRateHertz (16000 )
188
205
.build ();
189
206
RecognitionAudio audio = RecognitionAudio .newBuilder ()
190
207
.setUri (gcsUri )
191
208
.build ();
192
209
193
210
// Use non-blocking call for getting file transcription
194
- OperationFuture <AsyncRecognizeResponse > response = speech .asyncRecognizeAsync (config , audio );
211
+ OperationFuture <LongRunningRecognizeResponse > response =
212
+ speech .longRunningRecognizeAsync (config , audio );
195
213
while (!response .isDone ()) {
196
214
System .out .println ("Waiting for response..." );
197
215
Thread .sleep (200 );
@@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
207
225
}
208
226
speech .close ();
209
227
}
228
+
229
+ /**
230
+ * Performs streaming speech recognition on raw PCM audio data.
231
+ *
232
+ * @param fileName the path to a PCM audio file to transcribe.
233
+ */
234
+ public static void streamingRecognizeFile (String fileName ) throws Exception , IOException {
235
+ Path path = Paths .get (fileName );
236
+ byte [] data = Files .readAllBytes (path );
237
+
238
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
239
+ SpeechClient speech = SpeechClient .create ();
240
+
241
+ // Configure request with local raw PCM audio
242
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
243
+ .setEncoding (AudioEncoding .LINEAR16 )
244
+ .setLanguageCode ("en-US" )
245
+ .setSampleRateHertz (16000 )
246
+ .build ();
247
+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
248
+ .setConfig (recConfig )
249
+ .build ();
250
+
251
+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
252
+ private final SettableFuture <List <T >> future = SettableFuture .create ();
253
+ private final List <T > messages = new java .util .ArrayList <T >();
254
+
255
+ @ Override
256
+ public void onNext (T message ) {
257
+ messages .add (message );
258
+ }
259
+
260
+ @ Override
261
+ public void onError (Throwable t ) {
262
+ future .setException (t );
263
+ }
264
+
265
+ @ Override
266
+ public void onCompleted () {
267
+ future .set (messages );
268
+ }
269
+
270
+ // Returns the SettableFuture object to get received messages / exceptions.
271
+ public SettableFuture <List <T >> future () {
272
+ return future ;
273
+ }
274
+ }
275
+
276
+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
277
+ new ResponseApiStreamingObserver <StreamingRecognizeResponse >();
278
+
279
+ StreamingCallable <StreamingRecognizeRequest ,StreamingRecognizeResponse > callable =
280
+ speech .streamingRecognizeCallable ();
281
+
282
+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
283
+ callable .bidiStreamingCall (responseObserver );
284
+
285
+ // The first request must **only** contain the audio configuration:
286
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
287
+ .setStreamingConfig (config )
288
+ .build ());
289
+
290
+ // Subsequent requests must **only** contain the audio data.
291
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
292
+ .setAudioContent (ByteString .copyFrom (data ))
293
+ .build ());
294
+
295
+ // Mark transmission as completed after sending the data.
296
+ requestObserver .onCompleted ();
297
+
298
+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
299
+
300
+ for (StreamingRecognizeResponse response : responses ) {
301
+ for (StreamingRecognitionResult result : response .getResultsList ()) {
302
+ for (SpeechRecognitionAlternative alternative : result .getAlternativesList ()) {
303
+ System .out .println (alternative .getTranscript ());
304
+ }
305
+ }
306
+ }
307
+ speech .close ();
308
+ }
309
+
210
310
}
0 commit comments