@@ -50,7 +50,7 @@ public static void main(String... args) throws Exception {
50
50
System .out .printf (
51
51
"\t java %s \" <command>\" \" <path-to-image>\" \n "
52
52
+ "Commands:\n "
53
- + "\t syncrecognize | asyncrecognize | streamrecognize\n "
53
+ + "\t syncrecognize | asyncrecognize | streamrecognize | wordoffsets \n "
54
54
+ "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
55
55
+ "for a Cloud Storage resource (gs://...)\n " ,
56
56
Recognize .class .getCanonicalName ());
@@ -66,6 +66,12 @@ public static void main(String... args) throws Exception {
66
66
} else {
67
67
syncRecognizeFile (path );
68
68
}
69
+ } else if (command .equals ("wordoffsets" )) {
70
+ if (path .startsWith ("gs://" )) {
71
+ asyncRecognizeWords (path );
72
+ } else {
73
+ syncRecognizeWords (path );
74
+ }
69
75
} else if (command .equals ("asyncrecognize" )) {
70
76
if (path .startsWith ("gs://" )) {
71
77
asyncRecognizeGcs (path );
@@ -113,6 +119,51 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
113
119
speech .close ();
114
120
}
115
121
122
+ /**
123
+ * Performs sync recognize and prints word time offsets.
124
+ *
125
+ * @param fileName the path to a PCM audio file to transcribe get offsets on.
126
+ */
127
+ public static void syncRecognizeWords (String fileName ) throws Exception , IOException {
128
+ SpeechClient speech = SpeechClient .create ();
129
+
130
+ Path path = Paths .get (fileName );
131
+ byte [] data = Files .readAllBytes (path );
132
+ ByteString audioBytes = ByteString .copyFrom (data );
133
+
134
+ // Configure request with local raw PCM audio
135
+ RecognitionConfig config = RecognitionConfig .newBuilder ()
136
+ .setEncoding (AudioEncoding .LINEAR16 )
137
+ .setLanguageCode ("en-US" )
138
+ .setSampleRateHertz (16000 )
139
+ .setEnableWordTimeOffsets (true )
140
+ .build ();
141
+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
142
+ .setContent (audioBytes )
143
+ .build ();
144
+
145
+ // Use blocking call to get audio transcript
146
+ RecognizeResponse response = speech .recognize (config , audio );
147
+ List <SpeechRecognitionResult > results = response .getResultsList ();
148
+
149
+ for (SpeechRecognitionResult result : results ) {
150
+ List <SpeechRecognitionAlternative > alternatives = result .getAlternativesList ();
151
+ for (SpeechRecognitionAlternative alternative : alternatives ) {
152
+ System .out .printf ("Transcription: %s%n" , alternative .getTranscript ());
153
+ for (WordInfo wordInfo : alternative .getWordsList ()) {
154
+ System .out .println (wordInfo .getWord ());
155
+ System .out .printf ("\t %s.%s sec - %s.%s sec\n " ,
156
+ wordInfo .getStartTime ().getSeconds (),
157
+ wordInfo .getStartTime ().getNanos () / 100000000 ,
158
+ wordInfo .getEndTime ().getSeconds (),
159
+ wordInfo .getEndTime ().getNanos () / 100000000 );
160
+ }
161
+ }
162
+ }
163
+ speech .close ();
164
+ }
165
+
166
+
116
167
/**
117
168
* Performs speech recognition on remote FLAC file and prints the transcription.
118
169
*
@@ -193,11 +244,11 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
193
244
194
245
/**
195
246
* Performs non-blocking speech recognition on remote FLAC file and prints
196
- * the transcription.
247
+ * the transcription as well as word time offsets .
197
248
*
198
249
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
199
250
*/
200
- public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
251
+ public static void asyncRecognizeWords (String gcsUri ) throws Exception , IOException {
201
252
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
202
253
SpeechClient speech = SpeechClient .create ();
203
254
@@ -240,6 +291,47 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
240
291
speech .close ();
241
292
}
242
293
294
+ /**
295
+ * Performs non-blocking speech recognition on remote FLAC file and prints
296
+ * the transcription.
297
+ *
298
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
299
+ */
300
+ public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
301
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
302
+ SpeechClient speech = SpeechClient .create ();
303
+
304
+ // Configure remote file request for Linear16
305
+ RecognitionConfig config = RecognitionConfig .newBuilder ()
306
+ .setEncoding (AudioEncoding .FLAC )
307
+ .setLanguageCode ("en-US" )
308
+ .setSampleRateHertz (16000 )
309
+ .build ();
310
+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
311
+ .setUri (gcsUri )
312
+ .build ();
313
+
314
+ // Use non-blocking call for getting file transcription
315
+ OperationFuture <LongRunningRecognizeResponse , LongRunningRecognizeMetadata ,
316
+ Operation > response =
317
+ speech .longRunningRecognizeAsync (config , audio );
318
+ while (!response .isDone ()) {
319
+ System .out .println ("Waiting for response..." );
320
+ Thread .sleep (10000 );
321
+ }
322
+
323
+ List <SpeechRecognitionResult > results = response .get ().getResultsList ();
324
+
325
+ for (SpeechRecognitionResult result : results ) {
326
+ List <SpeechRecognitionAlternative > alternatives = result .getAlternativesList ();
327
+ for (SpeechRecognitionAlternative alternative : alternatives ) {
328
+ System .out .printf ("Transcription: %s\n " ,alternative .getTranscript ());
329
+ }
330
+ }
331
+ speech .close ();
332
+ }
333
+
334
+
243
335
/**
244
336
* Performs streaming speech recognition on raw PCM audio data.
245
337
*
0 commit comments