Skip to content

Commit 14f31f0

Browse files
gguusslesv
authored andcommitted
Updates to highlight word time offsets (GoogleCloudPlatform#787)
1 parent 47dc3d2 commit 14f31f0

File tree

4 files changed

+121
-7
lines changed

4 files changed

+121
-7
lines changed

speech/cloud-client/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,15 @@ Build your project with:
4545
java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
4646
com.example.speech.Recognize asyncrecognize gs://cloud-samples-tests/speech/vr.flac
4747
```
48+
49+
### Synchronously transcribe an audio file and print word offsets
50+
```
51+
java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
52+
com.example.speech.Recognize wordoffsets ./resources/audio.raw
53+
```
54+
55+
### Asynchronously transcribe a remote audio file and print word offsets
56+
```
57+
java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
58+
com.example.speech.Recognize wordoffsets gs://cloud-samples-tests/speech/vr.flac
59+
```

speech/cloud-client/pom.xml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121

2222
<!-- Parent defines config for testing & linting. -->
2323
<parent>
24-
<groupId>com.google.cloud.samples</groupId>
25-
<artifactId>shared-configuration</artifactId>
26-
<version>1.0.5</version>
24+
<artifactId>doc-samples</artifactId>
25+
<groupId>com.google.cloud</groupId>
26+
<version>1.0.0</version>
27+
<relativePath>../..</relativePath>
2728
</parent>
2829

2930
<properties>

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public static void main(String... args) throws Exception {
5050
System.out.printf(
5151
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
5252
+ "Commands:\n"
53-
+ "\tsyncrecognize | asyncrecognize | streamrecognize\n"
53+
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
5454
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
5555
+ "for a Cloud Storage resource (gs://...)\n",
5656
Recognize.class.getCanonicalName());
@@ -66,6 +66,12 @@ public static void main(String... args) throws Exception {
6666
} else {
6767
syncRecognizeFile(path);
6868
}
69+
} else if (command.equals("wordoffsets")) {
70+
if (path.startsWith("gs://")) {
71+
asyncRecognizeWords(path);
72+
} else {
73+
syncRecognizeWords(path);
74+
}
6975
} else if (command.equals("asyncrecognize")) {
7076
if (path.startsWith("gs://")) {
7177
asyncRecognizeGcs(path);
@@ -113,6 +119,51 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
113119
speech.close();
114120
}
115121

122+
/**
123+
* Performs sync recognize and prints word time offsets.
124+
*
125+
* @param fileName the path to a PCM audio file to transcribe get offsets on.
126+
*/
127+
public static void syncRecognizeWords(String fileName) throws Exception, IOException {
128+
SpeechClient speech = SpeechClient.create();
129+
130+
Path path = Paths.get(fileName);
131+
byte[] data = Files.readAllBytes(path);
132+
ByteString audioBytes = ByteString.copyFrom(data);
133+
134+
// Configure request with local raw PCM audio
135+
RecognitionConfig config = RecognitionConfig.newBuilder()
136+
.setEncoding(AudioEncoding.LINEAR16)
137+
.setLanguageCode("en-US")
138+
.setSampleRateHertz(16000)
139+
.setEnableWordTimeOffsets(true)
140+
.build();
141+
RecognitionAudio audio = RecognitionAudio.newBuilder()
142+
.setContent(audioBytes)
143+
.build();
144+
145+
// Use blocking call to get audio transcript
146+
RecognizeResponse response = speech.recognize(config, audio);
147+
List<SpeechRecognitionResult> results = response.getResultsList();
148+
149+
for (SpeechRecognitionResult result: results) {
150+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
151+
for (SpeechRecognitionAlternative alternative: alternatives) {
152+
System.out.printf("Transcription: %s%n", alternative.getTranscript());
153+
for (WordInfo wordInfo: alternative.getWordsList()) {
154+
System.out.println(wordInfo.getWord());
155+
System.out.printf("\t%s.%s sec - %s.%s sec\n",
156+
wordInfo.getStartTime().getSeconds(),
157+
wordInfo.getStartTime().getNanos() / 100000000,
158+
wordInfo.getEndTime().getSeconds(),
159+
wordInfo.getEndTime().getNanos() / 100000000);
160+
}
161+
}
162+
}
163+
speech.close();
164+
}
165+
166+
116167
/**
117168
* Performs speech recognition on remote FLAC file and prints the transcription.
118169
*
@@ -193,11 +244,11 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
193244

194245
/**
195246
* Performs non-blocking speech recognition on remote FLAC file and prints
196-
* the transcription.
247+
* the transcription as well as word time offsets.
197248
*
198249
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
199250
*/
200-
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
251+
public static void asyncRecognizeWords(String gcsUri) throws Exception, IOException {
201252
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
202253
SpeechClient speech = SpeechClient.create();
203254

@@ -240,6 +291,47 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
240291
speech.close();
241292
}
242293

294+
/**
295+
* Performs non-blocking speech recognition on remote FLAC file and prints
296+
* the transcription.
297+
*
298+
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
299+
*/
300+
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
301+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
302+
SpeechClient speech = SpeechClient.create();
303+
304+
// Configure remote file request for Linear16
305+
RecognitionConfig config = RecognitionConfig.newBuilder()
306+
.setEncoding(AudioEncoding.FLAC)
307+
.setLanguageCode("en-US")
308+
.setSampleRateHertz(16000)
309+
.build();
310+
RecognitionAudio audio = RecognitionAudio.newBuilder()
311+
.setUri(gcsUri)
312+
.build();
313+
314+
// Use non-blocking call for getting file transcription
315+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata,
316+
Operation> response =
317+
speech.longRunningRecognizeAsync(config, audio);
318+
while (!response.isDone()) {
319+
System.out.println("Waiting for response...");
320+
Thread.sleep(10000);
321+
}
322+
323+
List<SpeechRecognitionResult> results = response.get().getResultsList();
324+
325+
for (SpeechRecognitionResult result: results) {
326+
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
327+
for (SpeechRecognitionAlternative alternative: alternatives) {
328+
System.out.printf("Transcription: %s\n",alternative.getTranscript());
329+
}
330+
}
331+
speech.close();
332+
}
333+
334+
243335
/**
244336
* Performs streaming speech recognition on raw PCM audio data.
245337
*

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ public void testRecognizeFile() throws Exception {
6262
assertThat(got).contains("how old is the Brooklyn Bridge");
6363
}
6464

65+
@Test
66+
public void testRecognizeWordoffset() throws Exception {
67+
Recognize.syncRecognizeWords(fileName);
68+
String got = bout.toString();
69+
assertThat(got).contains("how old is the Brooklyn Bridge");
70+
assertThat(got).contains("\t0.0 sec -");
71+
}
72+
6573
@Test
6674
public void testRecognizeGcs() throws Exception {
6775
Recognize.syncRecognizeGcs(gcsPath);
@@ -85,8 +93,9 @@ public void testAsyncRecognizeGcs() throws Exception {
8593

8694
@Test
8795
public void testAsyncWordoffset() throws Exception {
88-
Recognize.asyncRecognizeGcs(gcsPath);
96+
Recognize.asyncRecognizeWords(gcsPath);
8997
String got = bout.toString();
98+
assertThat(got).contains("how old is the Brooklyn Bridge");
9099
assertThat(got).contains("\t0.0 sec -");
91100
}
92101

0 commit comments

Comments
 (0)