Skip to content

Commit fdd5eb2

Browse files
authored
Merge pull request GoogleCloudPlatform#267 from GoogleCloudPlatform/speech_api_changes
Speech api changes
2 parents 0174680 + 5a6d2ca commit fdd5eb2

36 files changed

+936
-216
lines changed

speech/grpc/README.md

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Cloud Speech API gRPC samples for Java
22

33
This is a sample repo for accessing the [Google Cloud Speech API](http://cloud.google.com/speech) with
4-
[gRPC](http://www.grpc.io/) client library.
5-
4+
[gRPC](http://www.grpc.io/) client library. Note that these samples are for `advanced users` and is in
5+
BETA. Please see [Google Cloud Platform Launch Stages](https://cloud.google.com/terms/launch-stages).
66

77
## Prerequisites
88

@@ -73,20 +73,35 @@ note that the audio file must be in RAW format. You can use `sox`
7373
(available, e.g. via [http://sox.sourceforge.net/](http://sox.sourceforge.net/)
7474
or [homebrew](http://brew.sh/)) to convert audio files to raw format.
7575

76-
### Run the non-streaming client
76+
### Run the sync client
7777

78-
You can run the batch client like this:
78+
You can run the sync client like this:
7979

8080
```sh
81-
$ bin/speech-sample-nonstreaming.sh --host=speech.googleapis.com --port=443 \
82-
--file=<audio file path> --sampling=<sample rate>
81+
$ bin/speech-sample-sync.sh --host=speech.googleapis.com --port=443 \
82+
--uri=<audio file uri> --sampling=<sample rate>
8383
```
8484

8585
Try a streaming rate of 16000 and the included sample audio file, as follows:
8686

8787
```sh
88-
$ bin/speech-sample-nonstreaming.sh --host=speech.googleapis.com --port=443 \
89-
--file=resources/audio.raw --sampling=16000
88+
$ bin/speech-sample-sync.sh --host=speech.googleapis.com --port=443 \
89+
--uri=resources/audio.raw --sampling=16000
90+
```
91+
92+
### Run the async client
93+
94+
You can run the async client like this:
95+
96+
```sh
97+
bin/speech-sample-async.sh --host=speech.googleapis.com --port=443 \
98+
--uri=<audio file uri> --sampling=<sample rate>
99+
```
100+
101+
Try a streaming rate of 16000 and the included sample audio file, as follows:
102+
```sh
103+
$ bin/speech-sample-async.sh --host=speech.googleapis.com --port=443 \
104+
--uri=resources/audio.raw --sampling=16000
90105
```
91106

92107
### Run the streaming client

speech/grpc/bin/speech-sample-nonstreaming.sh renamed to speech/grpc/bin/speech-sample-async.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515

1616
SRC_DIR=$(cd "$(dirname "$0")/.."; pwd)
1717
java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \
18-
com.google.cloud.speech.grpc.demos.NonStreamingRecognizeClient "$@"
18+
com.examples.cloud.speech.AsyncRecognizeClient "$@"

speech/grpc/bin/speech-sample-streaming.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515

1616
SRC_DIR=$(cd "$(dirname "$0")/.."; pwd)
1717
java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \
18-
com.google.cloud.speech.grpc.demos.RecognizeClient "$@"
18+
com.examples.cloud.speech.StreamingRecognizeClient "$@"

speech/grpc/bin/speech-sample-sync.sh

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
# Copyright 2016 Google Inc. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
SRC_DIR=$(cd "$(dirname "$0")/.."; pwd)
17+
java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \
18+
com.examples.cloud.speech.SyncRecognizeClient "$@"

speech/grpc/pom.xml

+9-7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ limitations under the License.
2525
<url>https://cloud.google.com/speech/</url>
2626
<inceptionYear>2016</inceptionYear>
2727

28+
<!-- Parent defines plugins for checkstyle and unit testing. -->
29+
<parent>
30+
<groupId>com.google.cloud</groupId>
31+
<artifactId>shared-configuration</artifactId>
32+
<version>1.0.0</version>
33+
<relativePath>../../java-repo-tools</relativePath>
34+
</parent>
35+
2836
<licenses>
2937
<license>
3038
<name>Apache 2</name>
@@ -38,13 +46,6 @@ limitations under the License.
3846
<url>http://www.google.com</url>
3947
</organization>
4048

41-
<parent>
42-
<groupId>com.google.cloud</groupId>
43-
<artifactId>doc-samples</artifactId>
44-
<version>1.0.0</version>
45-
<relativePath>../..</relativePath>
46-
</parent>
47-
4849
<properties>
4950
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
5051
</properties>
@@ -195,6 +196,7 @@ limitations under the License.
195196
-->
196197
<protocArtifact>com.google.protobuf:protoc:3.0.0-beta-2:exe:${os.detected.classifier}</protocArtifact>
197198
<pluginId>grpc-java</pluginId>
199+
<protoSourceRoot>${basedir}/src/main/java/third_party</protoSourceRoot>
198200
<pluginArtifact>io.grpc:protoc-gen-grpc-java:0.13.2:exe:${os.detected.classifier}</pluginArtifact>
199201
</configuration>
200202
<executions>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
/*
2+
* Copyright 2016 Google Inc. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.examples.cloud.speech;
18+
19+
import com.google.auth.oauth2.GoogleCredentials;
20+
import com.google.cloud.speech.v1beta1.AsyncRecognizeRequest;
21+
import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse;
22+
import com.google.cloud.speech.v1beta1.RecognitionAudio;
23+
import com.google.cloud.speech.v1beta1.RecognitionConfig;
24+
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
25+
import com.google.cloud.speech.v1beta1.SpeechGrpc;
26+
27+
import com.google.longrunning.GetOperationRequest;
28+
import com.google.longrunning.Operation;
29+
import com.google.longrunning.OperationsGrpc;
30+
31+
import io.grpc.ManagedChannel;
32+
import io.grpc.StatusRuntimeException;
33+
import io.grpc.auth.ClientAuthInterceptor;
34+
import io.grpc.netty.NegotiationType;
35+
import io.grpc.netty.NettyChannelBuilder;
36+
37+
import org.apache.commons.cli.CommandLine;
38+
import org.apache.commons.cli.CommandLineParser;
39+
import org.apache.commons.cli.DefaultParser;
40+
import org.apache.commons.cli.OptionBuilder;
41+
import org.apache.commons.cli.Options;
42+
import org.apache.commons.cli.ParseException;
43+
44+
import java.io.IOException;
45+
import java.net.URI;
46+
import java.util.Arrays;
47+
import java.util.List;
48+
import java.util.concurrent.Executors;
49+
import java.util.concurrent.TimeUnit;
50+
import java.util.logging.Level;
51+
import java.util.logging.Logger;
52+
53+
/**
54+
* Client that sends audio to Speech.AsyncRecognize and returns transcript.
55+
*/
56+
public class AsyncRecognizeClient {
57+
58+
private static final Logger logger = Logger.getLogger(AsyncRecognizeClient.class.getName());
59+
60+
private static final List<String> OAUTH2_SCOPES =
61+
Arrays.asList("https://www.googleapis.com/auth/cloud-platform");
62+
63+
private final URI input;
64+
private final int samplingRate;
65+
66+
private final ManagedChannel channel;
67+
private final SpeechGrpc.SpeechBlockingStub speechClient;
68+
private final OperationsGrpc.OperationsBlockingStub statusClient;
69+
70+
/**
71+
* Construct client connecting to Cloud Speech server at {@code host:port}.
72+
*/
73+
public AsyncRecognizeClient(ManagedChannel channel, URI input, int samplingRate)
74+
throws IOException {
75+
this.input = input;
76+
this.samplingRate = samplingRate;
77+
this.channel = channel;
78+
79+
speechClient = SpeechGrpc.newBlockingStub(channel);
80+
statusClient = OperationsGrpc.newBlockingStub(channel);
81+
}
82+
83+
public void shutdown() throws InterruptedException {
84+
channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
85+
}
86+
87+
public static ManagedChannel createChannel(String host, int port) throws IOException {
88+
GoogleCredentials creds = GoogleCredentials.getApplicationDefault();
89+
creds = creds.createScoped(OAUTH2_SCOPES);
90+
ManagedChannel channel =
91+
NettyChannelBuilder.forAddress(host, port)
92+
.negotiationType(NegotiationType.TLS)
93+
.intercept(new ClientAuthInterceptor(creds, Executors.newSingleThreadExecutor()))
94+
.build();
95+
96+
return channel;
97+
}
98+
99+
/**
100+
* Sends a request to the speech API and returns an Operation handle.
101+
*/
102+
public void recognize() {
103+
RecognitionAudio audio;
104+
try {
105+
audio = RecognitionAudioFactory.createRecognitionAudio(this.input);
106+
} catch (IOException e) {
107+
logger.log(Level.WARNING, "Failed to read audio uri input: " + input);
108+
return;
109+
}
110+
logger.info("Sending " + audio.getContent().size() + " bytes from audio uri input: " + input);
111+
RecognitionConfig config =
112+
RecognitionConfig.newBuilder()
113+
.setEncoding(AudioEncoding.LINEAR16)
114+
.setSampleRate(samplingRate)
115+
.build();
116+
AsyncRecognizeRequest request =
117+
AsyncRecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build();
118+
119+
Operation operation;
120+
Operation status;
121+
try {
122+
operation = speechClient.asyncRecognize(request);
123+
124+
// Print the long running operation handle
125+
logger.log(
126+
Level.INFO,
127+
String.format("Operation handle: %s, URI: %s", operation.getName(), input.toString()));
128+
} catch (StatusRuntimeException e) {
129+
logger.log(Level.WARNING, "RPC failed: {0}", e.getStatus());
130+
return;
131+
}
132+
133+
while (true) {
134+
try {
135+
logger.log(Level.INFO, "Waiting 2s for operation, {0} processing...", operation.getName());
136+
Thread.sleep(2000);
137+
GetOperationRequest operationReq =
138+
GetOperationRequest.newBuilder().setName(operation.getName()).build();
139+
status =
140+
statusClient.getOperation(
141+
GetOperationRequest.newBuilder().setName(operation.getName()).build());
142+
143+
if (status.getDone()) {
144+
break;
145+
}
146+
} catch (Exception ex) {
147+
logger.log(Level.WARNING, ex.getMessage());
148+
}
149+
}
150+
151+
try {
152+
AsyncRecognizeResponse asyncRes = status.getResponse().unpack(AsyncRecognizeResponse.class);
153+
154+
logger.info("Received response: " + asyncRes);
155+
} catch (com.google.protobuf.InvalidProtocolBufferException ex) {
156+
logger.log(Level.WARNING, "Unpack error, {0}", ex.getMessage());
157+
}
158+
}
159+
160+
public static void main(String[] args) throws Exception {
161+
162+
String audioFile = "";
163+
String host = "speech.googleapis.com";
164+
Integer port = 443;
165+
Integer sampling = 16000;
166+
167+
CommandLineParser parser = new DefaultParser();
168+
169+
Options options = new Options();
170+
options.addOption(
171+
OptionBuilder.withLongOpt("uri")
172+
.withDescription("path to audio uri")
173+
.hasArg()
174+
.withArgName("FILE_PATH")
175+
.create());
176+
options.addOption(
177+
OptionBuilder.withLongOpt("host")
178+
.withDescription("endpoint for api, e.g. speech.googleapis.com")
179+
.hasArg()
180+
.withArgName("ENDPOINT")
181+
.create());
182+
options.addOption(
183+
OptionBuilder.withLongOpt("port")
184+
.withDescription("SSL port, usually 443")
185+
.hasArg()
186+
.withArgName("PORT")
187+
.create());
188+
options.addOption(
189+
OptionBuilder.withLongOpt("sampling")
190+
.withDescription("Sampling Rate, i.e. 16000")
191+
.hasArg()
192+
.withArgName("RATE")
193+
.create());
194+
195+
try {
196+
CommandLine line = parser.parse(options, args);
197+
if (line.hasOption("uri")) {
198+
audioFile = line.getOptionValue("uri");
199+
} else {
200+
System.err.println("An Audio uri must be specified (e.g. file:///foo/baz.raw).");
201+
System.exit(1);
202+
}
203+
204+
if (line.hasOption("host")) {
205+
host = line.getOptionValue("host");
206+
} else {
207+
System.err.println("An API enpoint must be specified (typically speech.googleapis.com).");
208+
System.exit(1);
209+
}
210+
211+
if (line.hasOption("port")) {
212+
port = Integer.parseInt(line.getOptionValue("port"));
213+
} else {
214+
System.err.println("An SSL port must be specified (typically 443).");
215+
System.exit(1);
216+
}
217+
218+
if (line.hasOption("sampling")) {
219+
sampling = Integer.parseInt(line.getOptionValue("sampling"));
220+
} else {
221+
System.err.println("An Audio sampling rate must be specified.");
222+
System.exit(1);
223+
}
224+
} catch (ParseException exp) {
225+
System.err.println("Unexpected exception:" + exp.getMessage());
226+
System.exit(1);
227+
}
228+
229+
ManagedChannel channel = AsyncRecognizeClient.createChannel(host, port);
230+
231+
AsyncRecognizeClient client =
232+
new AsyncRecognizeClient(channel, URI.create(audioFile), sampling);
233+
try {
234+
client.recognize();
235+
} finally {
236+
client.shutdown();
237+
}
238+
}
239+
}

0 commit comments

Comments
 (0)