Skip to content

Commit c1f92ac

Browse files
committed
added uri read direct
1 parent ca7cb9e commit c1f92ac

File tree

2 files changed

+47
-25
lines changed

2 files changed

+47
-25
lines changed

speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,9 @@
1818
package com.google.cloud.speech.grpc.demos;
1919

2020
import com.google.cloud.speech.v1.AudioRequest;
21-
import com.google.cloud.storage.Blob;
22-
import com.google.cloud.storage.BlobId;
23-
import com.google.cloud.storage.Storage;
24-
import com.google.cloud.storage.StorageOptions;
2521
import com.google.protobuf.ByteString;
2622

2723
import java.io.IOException;
28-
2924
import java.net.URI;
3025
import java.nio.file.Files;
3126
import java.nio.file.Path;
@@ -37,8 +32,8 @@
3732
*/
3833
public class AudioRequestFactory {
3934

40-
private static final String FILE = "file";
41-
private static final String GS = "gs";
35+
private static final String FILE_SCHEME = "file";
36+
private static final String GS_SCHEME = "gs";
4237

4338
/**
4439
* Takes an input URI of form $scheme:// and converts to audio request.
@@ -48,15 +43,11 @@ public class AudioRequestFactory {
4843
*/
4944
public static AudioRequest createRequest(URI uri)
5045
throws IOException {
51-
if (uri.getScheme() == null || uri.getScheme().equals(FILE)) {
46+
if (uri.getScheme() == null || uri.getScheme().equals(FILE_SCHEME)) {
5247
Path path = Paths.get(uri);
5348
return audioFromBytes(Files.readAllBytes(path));
54-
} else if (uri.getScheme().equals(GS)) {
55-
Storage storage = StorageOptions.defaultInstance().service();
56-
String path = uri.getPath();
57-
BlobId blobId = BlobId.of(uri.getHost(), path.substring(1, path.length()));
58-
Blob blob = storage.get(blobId);
59-
return audioFromBytes(blob.content());
49+
} else if (uri.getScheme().equals(GS_SCHEME)) {
50+
return AudioRequest.newBuilder().setUri(uri.toString()).build();
6051
}
6152
throw new RuntimeException("scheme not supported " + uri.getScheme());
6253
}

speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,15 @@ option java_multiple_files = true;
2323
option java_outer_classname = "SpeechProto";
2424
option java_package = "com.google.cloud.speech.v1";
2525

26+
2627
// Service that implements Google Cloud Speech API.
2728
service Speech {
2829
// Perform bidirectional streaming speech recognition on audio using gRPC.
2930
rpc Recognize(stream RecognizeRequest) returns (stream RecognizeResponse);
3031

3132
// Perform non-streaming speech recognition on audio using HTTPS.
3233
rpc NonStreamingRecognize(RecognizeRequest) returns (NonStreamingRecognizeResponse) {
33-
option (.google.api.http) = { post: "/v1/speech:recognize" body: "*" };
34+
option (google.api.http) = { post: "/v1/speech:recognize" body: "*" };
3435
}
3536
}
3637

@@ -54,7 +55,7 @@ message RecognizeRequest {
5455

5556
// The audio data to be recognized. For `NonStreamingRecognize`, all the
5657
// audio data must be contained in the first (and only) `RecognizeRequest`
57-
// message. For streaming `Recognize`, sequential chunks of audio data are
58+
// message. For streaming `Recognize`, sequential chunks of audio data are
5859
// sent in sequential `RecognizeRequest` messages.
5960
AudioRequest audio_request = 2;
6061
}
@@ -64,7 +65,7 @@ message RecognizeRequest {
6465
message InitialRecognizeRequest {
6566
// Audio encoding of the data sent in the audio message.
6667
enum AudioEncoding {
67-
// Not specified. Will return result `INVALID_ARGUMENT`.
68+
// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
6869
ENCODING_UNSPECIFIED = 0;
6970

7071
// Uncompressed 16-bit signed little-endian samples.
@@ -118,8 +119,6 @@ message InitialRecognizeRequest {
118119
// profanities, replacing all but the initial character in each filtered word
119120
// with asterisks, e.g. "f***". If set to `false` or omitted, profanities
120121
// won't be filtered out.
121-
// Note that profanity filtering is not implemented for all languages.
122-
// If the language is not supported, this setting has no effect.
123122
bool profanity_filter = 5;
124123

125124
// [Optional] If `false` or omitted, the recognizer will detect a single
@@ -146,13 +145,38 @@ message InitialRecognizeRequest {
146145
// as they become available.
147146
// If `false` or omitted, no `EndpointerEvents` are returned.
148147
bool enable_endpointer_events = 8;
148+
149+
// [Optional] URI that points to a file where the recognition result should
150+
// be stored in JSON format. If omitted or empty string, the recognition
151+
// result is returned in the response. Should be specified only for
152+
// `NonStreamingRecognize`. If specified in a `Recognize` request,
153+
// `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
154+
// If specified in a `NonStreamingRecognize` request,
155+
// `NonStreamingRecognize` returns immediately, and the output file
156+
// is created asynchronously once the audio processing completes.
157+
// Currently, only Google Cloud Storage URIs are supported, which must be
158+
// specified in the following format: `gs://bucket_name/object_name`
159+
// (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
160+
// more information, see [Request URIs](/storage/docs/reference-uris).
161+
string output_uri = 9;
149162
}
150163

151164
// Contains audio data in the format specified in the `InitialRecognizeRequest`.
165+
// Either `content` or `uri` must be supplied. Supplying both or neither
166+
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
152167
message AudioRequest {
153-
// [Required] The audio data bytes encoded as specified in
154-
// `InitialRecognizeRequest`.
168+
// The audio data bytes encoded as specified in
169+
// `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers
170+
// use a pure binary representation, whereas JSON representations use base64.
155171
bytes content = 1;
172+
173+
// URI that points to a file that contains audio data bytes as specified in
174+
// `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are
175+
// supported, which must be specified in the following format:
176+
// `gs://bucket_name/object_name` (other URI formats return
177+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
178+
// [Request URIs](/storage/docs/reference-uris).
179+
string uri = 2;
156180
}
157181

158182
// `NonStreamingRecognizeResponse` is the only message returned to the client by
@@ -191,10 +215,14 @@ message RecognizeResponse {
191215

192216
// [Output-only] If set, returns a [google.rpc.Status][] message that
193217
// specifies the error for the operation.
194-
.google.rpc.Status error = 1;
195-
196-
// [Output-only] May contain zero or one `is_final=true` result (the newly
197-
// settled portion). May also contain zero or more `is_final=false` results.
218+
google.rpc.Status error = 1;
219+
220+
// [Output-only] For `continuous=false`, this repeated list contains zero or
221+
// one result that corresponds to all of the audio processed so far. For
222+
// `continuous=true`, this repeated list contains zero or more results that
223+
// correspond to consecutive portions of the audio being processed.
224+
// In both cases, contains zero or one `is_final=true` result (the newly
225+
// settled portion), followed by zero or more `is_final=false` results.
198226
repeated SpeechRecognitionResult results = 2;
199227

200228
// [Output-only] Indicates the lowest index in the `results` array that has
@@ -206,7 +234,10 @@ message RecognizeResponse {
206234
EndpointerEvent endpoint = 4;
207235
}
208236

237+
// A speech recognition result corresponding to a portion of the audio.
209238
message SpeechRecognitionResult {
239+
// [Output-only] May contain one or more recognition hypotheses (up to the
240+
// maximum specified in `max_alternatives`).
210241
repeated SpeechRecognitionAlternative alternatives = 1;
211242

212243
// [Output-only] Set `true` if this is the final time the speech service will

0 commit comments

Comments
 (0)