Skip to content

Commit ad5d658

Browse files
author
Jerjou Cheng
committedJan 4, 2017
Stream audio from microphone for speech streaming
1 parent 8060f46 commit ad5d658

File tree

3 files changed

+175
-87
lines changed

3 files changed

+175
-87
lines changed
 

‎speech/grpc/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ limitations under the License.
156156
<version>0.31</version>
157157
<scope>test</scope>
158158
</dependency>
159+
<dependency>
160+
<groupId>org.mockito</groupId>
161+
<artifactId>mockito-all</artifactId>
162+
<version>1.10.19</version>
163+
<scope>test</scope>
164+
</dependency>
159165
<dependency>
160166
<groupId>io.grpc</groupId>
161167
<artifactId>grpc-auth</artifactId>

‎speech/grpc/src/main/java/com/examples/cloud/speech/StreamingRecognizeClient.java

Lines changed: 88 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
2424
import com.google.cloud.speech.v1beta1.SpeechGrpc;
2525
import com.google.cloud.speech.v1beta1.StreamingRecognitionConfig;
26+
import com.google.cloud.speech.v1beta1.StreamingRecognitionResult;
2627
import com.google.cloud.speech.v1beta1.StreamingRecognizeRequest;
2728
import com.google.cloud.speech.v1beta1.StreamingRecognizeResponse;
2829
import com.google.protobuf.ByteString;
29-
import com.google.protobuf.TextFormat;
3030

3131
import io.grpc.ManagedChannel;
3232
import io.grpc.ManagedChannelBuilder;
@@ -44,52 +44,54 @@
4444
import org.apache.log4j.Logger;
4545
import org.apache.log4j.SimpleLayout;
4646

47-
import java.io.File;
48-
import java.io.FileInputStream;
4947
import java.io.IOException;
5048
import java.util.Arrays;
5149
import java.util.List;
5250
import java.util.concurrent.CountDownLatch;
5351
import java.util.concurrent.Executors;
5452
import java.util.concurrent.TimeUnit;
53+
import javax.sound.sampled.AudioFormat;
54+
import javax.sound.sampled.AudioSystem;
55+
import javax.sound.sampled.DataLine;
56+
import javax.sound.sampled.LineUnavailableException;
57+
import javax.sound.sampled.TargetDataLine;
5558

5659

5760
/**
5861
* Client that sends streaming audio to Speech.Recognize and returns streaming transcript.
5962
*/
6063
public class StreamingRecognizeClient {
6164

62-
private final String file;
63-
private final int samplingRate;
64-
6565
private static final Logger logger = Logger.getLogger(StreamingRecognizeClient.class.getName());
6666

6767
private final ManagedChannel channel;
68-
6968
private final SpeechGrpc.SpeechStub speechClient;
70-
71-
private static final int BYTES_PER_BUFFER = 3200; //buffer size in bytes
72-
private static final int BYTES_PER_SAMPLE = 2; //bytes per sample for LINEAR16
73-
7469
private static final List<String> OAUTH2_SCOPES =
7570
Arrays.asList("https://www.googleapis.com/auth/cloud-platform");
7671

72+
static final int BYTES_PER_SAMPLE = 2; // bytes per sample for LINEAR16
73+
74+
private final int samplingRate;
75+
final int bytesPerBuffer; // buffer size in bytes
76+
77+
// Used for testing
78+
protected TargetDataLine mockDataLine = null;
79+
7780
/**
7881
* Construct client connecting to Cloud Speech server at {@code host:port}.
7982
*/
80-
public StreamingRecognizeClient(ManagedChannel channel, String file, int samplingRate)
83+
public StreamingRecognizeClient(ManagedChannel channel, int samplingRate)
8184
throws IOException {
82-
this.file = file;
8385
this.samplingRate = samplingRate;
8486
this.channel = channel;
87+
this.bytesPerBuffer = samplingRate * BYTES_PER_SAMPLE / 10; // 100 ms
8588

8689
speechClient = SpeechGrpc.newStub(channel);
8790

8891
// Send log4j logs to Console
8992
// If you are going to run this on GCE, you might wish to integrate with
90-
// google-cloud-java logging. See:
93+
// google-cloud-java logging. See:
9194
// https://github.com/GoogleCloudPlatform/google-cloud-java/blob/master/README.md#stackdriver-logging-alpha
92-
9395
ConsoleAppender appender = new ConsoleAppender(new SimpleLayout(), SYSTEM_OUT);
9496
logger.addAppender(appender);
9597
}
@@ -109,19 +111,73 @@ static ManagedChannel createChannel(String host, int port) throws IOException {
109111
return channel;
110112
}
111113

114+
/**
115+
* Return a Line to the audio input device.
116+
*/
117+
private TargetDataLine getAudioInputLine() {
118+
// For testing
119+
if (null != mockDataLine) {
120+
return mockDataLine;
121+
}
122+
123+
AudioFormat format = new AudioFormat(samplingRate, BYTES_PER_SAMPLE * 8, 1, true, false);
124+
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
125+
if (!AudioSystem.isLineSupported(info)) {
126+
throw new RuntimeException(String.format(
127+
"Device doesn't support LINEAR16 mono raw audio format at {}Hz", samplingRate));
128+
}
129+
try {
130+
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
131+
// Make sure the line buffer doesn't overflow while we're filling this thread's buffer.
132+
line.open(format, bytesPerBuffer * 5);
133+
return line;
134+
} catch (LineUnavailableException e) {
135+
throw new RuntimeException(e);
136+
}
137+
}
138+
112139
/** Send streaming recognize requests to server. */
113140
public void recognize() throws InterruptedException, IOException {
114141
final CountDownLatch finishLatch = new CountDownLatch(1);
115142
StreamObserver<StreamingRecognizeResponse> responseObserver =
116143
new StreamObserver<StreamingRecognizeResponse>() {
144+
private int sentenceLength = 1;
145+
/**
146+
* Prints the transcription results. Interim results are overwritten by subsequent
147+
* results, until a final one is returned, at which point we start a new line.
148+
*
149+
* Flags the program to exit when it hears "exit".
150+
*/
117151
@Override
118152
public void onNext(StreamingRecognizeResponse response) {
119-
logger.info("Received response: " + TextFormat.printToString(response));
153+
List<StreamingRecognitionResult> results = response.getResultsList();
154+
if (results.size() < 1) {
155+
return;
156+
}
157+
158+
StreamingRecognitionResult result = results.get(0);
159+
String transcript = result.getAlternatives(0).getTranscript();
160+
161+
// Print interim results with a line feed, so subsequent transcriptions will overwrite
162+
// it. Final result will print a newline.
163+
String format = "%-" + this.sentenceLength + 's';
164+
if (result.getIsFinal()) {
165+
format += '\n';
166+
this.sentenceLength = 1;
167+
168+
if (transcript.toLowerCase().indexOf("exit") >= 0) {
169+
finishLatch.countDown();
170+
}
171+
} else {
172+
format += '\r';
173+
this.sentenceLength = transcript.length();
174+
}
175+
System.out.print(String.format(format, transcript));
120176
}
121177

122178
@Override
123179
public void onError(Throwable error) {
124-
logger.log(Level.WARN, "recognize failed: {0}", error);
180+
logger.log(Level.ERROR, "recognize failed: {0}", error);
125181
finishLatch.countDown();
126182
}
127183

@@ -146,33 +202,28 @@ public void onCompleted() {
146202
StreamingRecognitionConfig.newBuilder()
147203
.setConfig(config)
148204
.setInterimResults(true)
149-
.setSingleUtterance(true)
205+
.setSingleUtterance(false)
150206
.build();
151207

152208
StreamingRecognizeRequest initial =
153209
StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingConfig).build();
154210
requestObserver.onNext(initial);
155211

156-
// Open audio file. Read and send sequential buffers of audio as additional RecognizeRequests.
157-
FileInputStream in = new FileInputStream(new File(file));
158-
// For LINEAR16 at 16000 Hz sample rate, 3200 bytes corresponds to 100 milliseconds of audio.
159-
byte[] buffer = new byte[BYTES_PER_BUFFER];
212+
// Get a Line to the audio input device.
213+
TargetDataLine in = getAudioInputLine();
214+
byte[] buffer = new byte[bytesPerBuffer];
160215
int bytesRead;
161-
int totalBytes = 0;
162-
int samplesPerBuffer = BYTES_PER_BUFFER / BYTES_PER_SAMPLE;
163-
int samplesPerMillis = samplingRate / 1000;
164216

165-
while ((bytesRead = in.read(buffer)) != -1) {
166-
totalBytes += bytesRead;
217+
in.start();
218+
// Read and send sequential buffers of audio as additional RecognizeRequests.
219+
while (finishLatch.getCount() > 0
220+
&& (bytesRead = in.read(buffer, 0, buffer.length)) != -1) {
167221
StreamingRecognizeRequest request =
168222
StreamingRecognizeRequest.newBuilder()
169223
.setAudioContent(ByteString.copyFrom(buffer, 0, bytesRead))
170224
.build();
171225
requestObserver.onNext(request);
172-
// To simulate real-time audio, sleep after sending each audio buffer.
173-
Thread.sleep(samplesPerBuffer / samplesPerMillis);
174226
}
175-
logger.info("Sent " + totalBytes + " bytes from audio file: " + file);
176227
} catch (RuntimeException e) {
177228
// Cancel RPC.
178229
requestObserver.onError(e);
@@ -187,21 +238,13 @@ public void onCompleted() {
187238

188239
public static void main(String[] args) throws Exception {
189240

190-
String audioFile = "";
191-
String host = "speech.googleapis.com";
192-
Integer port = 443;
193-
Integer sampling = 16000;
241+
String host = null;
242+
Integer port = null;
243+
Integer sampling = null;
194244

195245
CommandLineParser parser = new DefaultParser();
196246

197247
Options options = new Options();
198-
options.addOption(
199-
Option.builder()
200-
.longOpt("file")
201-
.desc("path to audio file")
202-
.hasArg()
203-
.argName("FILE_PATH")
204-
.build());
205248
options.addOption(
206249
Option.builder()
207250
.longOpt("host")
@@ -226,31 +269,14 @@ public static void main(String[] args) throws Exception {
226269

227270
try {
228271
CommandLine line = parser.parse(options, args);
229-
if (line.hasOption("file")) {
230-
audioFile = line.getOptionValue("file");
231-
} else {
232-
System.err.println("An Audio file must be specified (e.g. /foo/baz.raw).");
233-
System.exit(1);
234-
}
235-
236-
if (line.hasOption("host")) {
237-
host = line.getOptionValue("host");
238-
} else {
239-
System.err.println("An API enpoint must be specified (typically speech.googleapis.com).");
240-
System.exit(1);
241-
}
242272

243-
if (line.hasOption("port")) {
244-
port = Integer.parseInt(line.getOptionValue("port"));
245-
} else {
246-
System.err.println("An SSL port must be specified (typically 443).");
247-
System.exit(1);
248-
}
273+
host = line.getOptionValue("host", "speech.googleapis.com");
274+
port = Integer.parseInt(line.getOptionValue("port", "443"));
249275

250276
if (line.hasOption("sampling")) {
251277
sampling = Integer.parseInt(line.getOptionValue("sampling"));
252278
} else {
253-
System.err.println("An Audio sampling rate must be specified.");
279+
System.err.println("An Audio sampling rate (--sampling) must be specified. (e.g. 16000)");
254280
System.exit(1);
255281
}
256282
} catch (ParseException exp) {
@@ -259,7 +285,7 @@ public static void main(String[] args) throws Exception {
259285
}
260286

261287
ManagedChannel channel = createChannel(host, port);
262-
StreamingRecognizeClient client = new StreamingRecognizeClient(channel, audioFile, sampling);
288+
StreamingRecognizeClient client = new StreamingRecognizeClient(channel, sampling);
263289
try {
264290
client.recognize();
265291
} finally {

0 commit comments

Comments
 (0)