20
20
import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoProgress ;
21
21
import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoRequest ;
22
22
import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoResponse ;
23
- import com .google .cloud .videointelligence .v1p1beta1 .EmotionAttribute ;
24
- import com .google .cloud .videointelligence .v1p1beta1 .FaceConfig ;
25
- import com .google .cloud .videointelligence .v1p1beta1 .FaceDetectionAnnotation ;
26
- import com .google .cloud .videointelligence .v1p1beta1 .FaceDetectionFrame ;
27
- import com .google .cloud .videointelligence .v1p1beta1 .FaceSegment ;
28
23
import com .google .cloud .videointelligence .v1p1beta1 .Feature ;
29
- import com .google .cloud .videointelligence .v1p1beta1 .NormalizedBoundingBox ;
30
24
import com .google .cloud .videointelligence .v1p1beta1 .SpeechRecognitionAlternative ;
31
25
import com .google .cloud .videointelligence .v1p1beta1 .SpeechTranscription ;
32
26
import com .google .cloud .videointelligence .v1p1beta1 .SpeechTranscriptionConfig ;
39
33
40
34
public class Detect {
41
35
/**
42
- * Detects face's bounding boxes, emotions, and video transcription using the Video Intelligence
36
+ * Detects video transcription using the Video Intelligence
43
37
* API
44
38
* @param args specifies features to detect and the path to the video on Google Cloud Storage.
45
39
*/
@@ -64,7 +58,7 @@ public static void argsHelper(String[] args) throws Exception {
64
58
System .out .printf (
65
59
"\t java %s \" <command>\" \" <path-to-video>\" \n "
66
60
+ "Commands:\n "
67
- + "\t faces-bounding-boxes | faces-emotions | speech -transcription\n "
61
+ + "\t speech -transcription\n "
68
62
+ "Path:\n \t A URI for a Cloud Storage resource (gs://...)\n "
69
63
+ "Examples: " ,
70
64
Detect .class .getCanonicalName ());
@@ -73,175 +67,11 @@ public static void argsHelper(String[] args) throws Exception {
73
67
String command = args [0 ];
74
68
String path = args .length > 1 ? args [1 ] : "" ;
75
69
76
- if (command .equals ("faces-bounding-boxes" )) {
77
- analyzeFacesBoundingBoxes (path );
78
- }
79
- if (command .equals ("faces-emotions" )) {
80
- analyzeFaceEmotions (path );
81
- }
82
70
if (command .equals ("speech-transcription" )) {
83
71
speechTranscription (path );
84
72
}
85
73
}
86
74
87
-
88
- // [START video_face_bounding_boxes]
89
- /**
90
- * Detects faces' bounding boxes on the video at the provided Cloud Storage path.
91
- *
92
- * @param gcsUri the path to the video file to analyze.
93
- */
94
- public static void analyzeFacesBoundingBoxes (String gcsUri ) throws Exception {
95
- // Instantiate a com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient
96
- try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
97
- // Set the configuration to include bounding boxes
98
- FaceConfig config = FaceConfig .newBuilder ()
99
- .setIncludeBoundingBoxes (true )
100
- .build ();
101
-
102
- // Set the video context with the above configuration
103
- VideoContext context = VideoContext .newBuilder ()
104
- .setFaceDetectionConfig (config )
105
- .build ();
106
-
107
- // Create the request
108
- AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
109
- .setInputUri (gcsUri )
110
- .addFeatures (Feature .FACE_DETECTION )
111
- .setVideoContext (context )
112
- .build ();
113
-
114
- // asynchronously perform facial analysis on videos
115
- OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
116
- client .annotateVideoAsync (request );
117
-
118
- System .out .println ("Waiting for operation to complete..." );
119
- boolean faceFound = false ;
120
- // Display the results
121
- for (VideoAnnotationResults results : response .get (900 , TimeUnit .SECONDS )
122
- .getAnnotationResultsList ()) {
123
- int faceCount = 0 ;
124
- // Display the results for each face
125
- for (FaceDetectionAnnotation faceAnnotation : results .getFaceDetectionAnnotationsList ()) {
126
- faceFound = true ;
127
- System .out .println ("\n Face: " + ++faceCount );
128
- // Each FaceDetectionAnnotation has only one segment.
129
- for (FaceSegment segment : faceAnnotation .getSegmentsList ()) {
130
- double startTime = segment .getSegment ().getStartTimeOffset ().getSeconds ()
131
- + segment .getSegment ().getStartTimeOffset ().getNanos () / 1e9 ;
132
- double endTime = segment .getSegment ().getEndTimeOffset ().getSeconds ()
133
- + segment .getSegment ().getEndTimeOffset ().getNanos () / 1e9 ;
134
- System .out .printf ("Segment location: %.3fs to %.3f\n " , startTime , endTime );
135
- }
136
- // There are typically many frames for each face,
137
- try {
138
- // Here we process only the first frame.
139
- if (faceAnnotation .getFramesCount () > 0 ) {
140
- FaceDetectionFrame frame = faceAnnotation .getFrames (0 ); // get the first frame
141
- double timeOffset = frame .getTimeOffset ().getSeconds ()
142
- + frame .getTimeOffset ().getNanos () / 1e9 ;
143
- System .out .printf ("First frame time offset: %.3fs\n " , timeOffset );
144
- // print info on the first normalized bounding box
145
- NormalizedBoundingBox box = frame .getAttributes (0 ).getNormalizedBoundingBox ();
146
- System .out .printf ("\t Left: %.3f\n " , box .getLeft ());
147
- System .out .printf ("\t Top: %.3f\n " , box .getTop ());
148
- System .out .printf ("\t Bottom: %.3f\n " , box .getBottom ());
149
- System .out .printf ("\t Right: %.3f\n " , box .getRight ());
150
- } else {
151
- System .out .println ("No frames found in annotation" );
152
- }
153
- } catch (IndexOutOfBoundsException ioe ) {
154
- System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
155
- }
156
- }
157
- }
158
-
159
- if (!faceFound ) {
160
- System .out .println ("No faces detected in " + gcsUri );
161
- }
162
- }
163
- }
164
- // [END video_face_bounding_boxes]
165
-
166
- // [START video_face_emotions]
167
- /**
168
- * Analyze faces' emotions over frames on the video at the provided Cloud Storage path.
169
- *
170
- * @param gcsUri the path to the video file to analyze.
171
- */
172
- public static void analyzeFaceEmotions (String gcsUri ) throws Exception {
173
- // Instantiate a com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient
174
- try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
175
- // Set the configuration to include bounding boxes
176
- FaceConfig config = FaceConfig .newBuilder ()
177
- .setIncludeEmotions (true )
178
- .build ();
179
-
180
- // Set the video context with the above configuration
181
- VideoContext context = VideoContext .newBuilder ()
182
- .setFaceDetectionConfig (config )
183
- .build ();
184
-
185
- // Create the request
186
- AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
187
- .setInputUri (gcsUri )
188
- .addFeatures (Feature .FACE_DETECTION )
189
- .setVideoContext (context )
190
- .build ();
191
-
192
- // asynchronously perform facial analysis on videos
193
- OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
194
- client .annotateVideoAsync (request );
195
-
196
- System .out .println ("Waiting for operation to complete..." );
197
- boolean faceFound = false ;
198
- // Display the results
199
- for (VideoAnnotationResults results : response .get (600 , TimeUnit .SECONDS )
200
- .getAnnotationResultsList ()) {
201
- int faceCount = 0 ;
202
- // Display the results for each face
203
- for (FaceDetectionAnnotation faceAnnotation : results .getFaceDetectionAnnotationsList ()) {
204
- faceFound = true ;
205
- System .out .println ("\n Face: " + ++faceCount );
206
- // Each FaceDetectionAnnotation has only one segment.
207
- for (FaceSegment segment : faceAnnotation .getSegmentsList ()) {
208
- double startTime = segment .getSegment ().getStartTimeOffset ().getSeconds ()
209
- + segment .getSegment ().getStartTimeOffset ().getNanos () / 1e9 ;
210
- double endTime = segment .getSegment ().getEndTimeOffset ().getSeconds ()
211
- + segment .getSegment ().getEndTimeOffset ().getNanos () / 1e9 ;
212
- System .out .printf ("Segment location: %.3fs to %.3f\n " , startTime , endTime );
213
- }
214
-
215
- try {
216
- // Print each frame's highest emotion
217
- for (FaceDetectionFrame frame : faceAnnotation .getFramesList ()) {
218
- double timeOffset = frame .getTimeOffset ().getSeconds ()
219
- + frame .getTimeOffset ().getNanos () / 1e9 ;
220
- float highestScore = 0.0f ;
221
- String emotion = "" ;
222
- // Get the highest scoring emotion for the current frame
223
- for (EmotionAttribute emotionAttribute : frame .getAttributes (0 ).getEmotionsList ()) {
224
- if (emotionAttribute .getScore () > highestScore ) {
225
- highestScore = emotionAttribute .getScore ();
226
- emotion = emotionAttribute .getEmotion ().name ();
227
- }
228
- }
229
- System .out .printf ("\t %4.2fs: %14s %4.3f\n " , timeOffset , emotion , highestScore );
230
- }
231
-
232
- } catch (IndexOutOfBoundsException ioe ) {
233
- System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
234
- }
235
- }
236
- }
237
-
238
- if (!faceFound ) {
239
- System .out .println ("No faces detected in " + gcsUri );
240
- }
241
- }
242
- }
243
- // [END video_face_emotions]
244
-
245
75
// [START video_speech_transcription]
246
76
/**
247
77
* Transcribe speech from a video stored on GCS.
@@ -268,7 +98,7 @@ public static void speechTranscription(String gcsUri) throws Exception {
268
98
.setVideoContext (context )
269
99
.build ();
270
100
271
- // asynchronously perform facial analysis on videos
101
+ // asynchronously perform speech transcription on videos
272
102
OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
273
103
client .annotateVideoAsync (request );
274
104
0 commit comments