23
23
python beta_snippets.py object-localization-uri gs://...
24
24
python beta_snippets.py handwritten-ocr INPUT_IMAGE
25
25
python beta_snippets.py handwritten-ocr-uri gs://...
26
- python beta_snippets.py doc-features INPUT_PDF
27
- python beta_snippets.py doc-features_uri gs://...
26
+ python beta_snippets.py batch-annotate-files INPUT_PDF
27
+ python beta_snippets.py batch-annotate-files-uri gs://...
28
+ python beta_snippets.py batch-annotate-images-uri gs://... gs://...
28
29
29
30
30
31
For more information, the documentation at
@@ -176,8 +177,8 @@ def detect_handwritten_ocr_uri(uri):
176
177
# [END vision_handwritten_ocr_gcs_beta]
177
178
178
179
179
- # [START vision_fulltext_detection_pdf_beta ]
180
- def detect_document_features (path ):
180
+ # [START vision_batch_annotate_files_beta ]
181
+ def detect_batch_annotate_files (path ):
181
182
"""Detects document features in a PDF/TIFF/GIF file.
182
183
183
184
While your PDF file may have several pages,
@@ -224,12 +225,12 @@ def detect_document_features(path):
224
225
for symbol in word .symbols :
225
226
print ('\t \t \t Symbol: {} (confidence: {})' .format (
226
227
symbol .text , symbol .confidence ))
227
- # [END vision_fulltext_detection_pdf_beta ]
228
+ # [END vision_batch_annotate_files_beta ]
228
229
229
230
230
- # [START vision_fulltext_detection_pdf_gcs_beta ]
231
- def detect_document_features_uri (gcs_uri ):
232
- """Detects document features in a PDF/TIFF/GIF file.
231
+ # [START vision_batch_annotate_files_gcs_beta ]
232
+ def detect_batch_annotate_files_uri (gcs_uri ):
233
+ """Detects document features in a PDF/TIFF/GIF file.
233
234
234
235
While your PDF file may have several pages,
235
236
this API can process up to 5 pages only.
@@ -272,7 +273,75 @@ def detect_document_features_uri(gcs_uri):
272
273
for symbol in word .symbols :
273
274
print ('\t \t \t Symbol: {} (confidence: {})' .format (
274
275
symbol .text , symbol .confidence ))
275
- # [END vision_fulltext_detection_pdf_gcs_beta]
276
+ # [END vision_batch_annotate_files_gcs_beta]
277
+
278
+
279
+ # [START vision_async_batch_annotate_images_beta]
280
+ def async_batch_annotate_images_uri (input_image_uri , output_uri ):
281
+ """Batch annotation of images on Google Cloud Storage asynchronously.
282
+
283
+ Args:
284
+ input_image_uri: The path to the image in Google Cloud Storage (gs://...)
285
+ output_uri: The path to the output path in Google Cloud Storage (gs://...)
286
+ """
287
+ import re
288
+
289
+ from google .cloud import storage
290
+ from google .protobuf import json_format
291
+ from google .cloud import vision_v1p4beta1 as vision
292
+ client = vision .ImageAnnotatorClient ()
293
+
294
+ # Construct the request for the image(s) to be annotated:
295
+ image_source = vision .types .ImageSource (image_uri = input_image_uri )
296
+ image = vision .types .Image (source = image_source )
297
+ features = [
298
+ vision .types .Feature (type = vision .enums .Feature .Type .LABEL_DETECTION ),
299
+ vision .types .Feature (type = vision .enums .Feature .Type .TEXT_DETECTION ),
300
+ vision .types .Feature (type = vision .enums .Feature .Type .IMAGE_PROPERTIES ),
301
+ ]
302
+ requests = [
303
+ vision .types .AnnotateImageRequest (image = image , features = features ),
304
+ ]
305
+
306
+ gcs_destination = vision .types .GcsDestination (uri = output_uri )
307
+ output_config = vision .types .OutputConfig (
308
+ gcs_destination = gcs_destination , batch_size = 2 )
309
+
310
+ operation = client .async_batch_annotate_images (
311
+ requests = requests , output_config = output_config )
312
+
313
+ print ('Waiting for the operation to finish.' )
314
+ operation .result (timeout = 10000 )
315
+
316
+ # Once the request has completed and the output has been
317
+ # written to Google Cloud Storage, we can list all the output files.
318
+ storage_client = storage .Client ()
319
+
320
+ match = re .match (r'gs://([^/]+)/(.+)' , output_uri )
321
+ bucket_name = match .group (1 )
322
+ prefix = match .group (2 )
323
+
324
+ bucket = storage_client .get_bucket (bucket_name = bucket_name )
325
+
326
+ # Lists objects with the given prefix.
327
+ blob_list = list (bucket .list_blobs (prefix = prefix ))
328
+ print ('Output files:' )
329
+ for blob in blob_list :
330
+ print (blob .name )
331
+
332
+ # Processes the first output file from Google Cloud Storage.
333
+ # Since we specified batch_size=2, the first response contains
334
+ # annotations for the first two annotate image requests.
335
+ output = blob_list [0 ]
336
+
337
+ json_string = output .download_as_string ()
338
+ response = json_format .Parse (json_string ,
339
+ vision .types .BatchAnnotateImagesResponse ())
340
+
341
+ # Prints the actual response for the first annotate image request.
342
+ print (u'The annotation response for the first request: {}' .format (
343
+ response .responses [0 ]))
344
+ # [END vision_async_batch_annotate_images_beta]
276
345
277
346
278
347
if __name__ == '__main__' :
@@ -297,13 +366,20 @@ def detect_document_features_uri(gcs_uri):
297
366
'handwritten-ocr-uri' , help = detect_handwritten_ocr_uri .__doc__ )
298
367
handwritten_uri_parser .add_argument ('uri' )
299
368
300
- doc_features_parser = subparsers .add_parser (
301
- 'doc-features' , help = detect_document_features .__doc__ )
302
- doc_features_parser .add_argument ('path' )
369
+ batch_annotate_parser = subparsers .add_parser (
370
+ 'batch-annotate-files' , help = detect_batch_annotate_files .__doc__ )
371
+ batch_annotate_parser .add_argument ('path' )
372
+
373
+ batch_annotate_uri_parser = subparsers .add_parser (
374
+ 'batch-annotate-files-uri' ,
375
+ help = detect_batch_annotate_files_uri .__doc__ )
376
+ batch_annotate_uri_parser .add_argument ('uri' )
303
377
304
- doc_features_uri_parser = subparsers .add_parser (
305
- 'doc-features-uri' , help = detect_document_features_uri .__doc__ )
306
- doc_features_uri_parser .add_argument ('uri' )
378
+ batch_annotate__image_uri_parser = subparsers .add_parser (
379
+ 'batch-annotate-images-uri' ,
380
+ help = async_batch_annotate_images_uri .__doc__ )
381
+ batch_annotate__image_uri_parser .add_argument ('uri' )
382
+ batch_annotate__image_uri_parser .add_argument ('output' )
307
383
308
384
args = parser .parse_args ()
309
385
@@ -312,12 +388,14 @@ def detect_document_features_uri(gcs_uri):
312
388
localize_objects_uri (args .uri )
313
389
elif 'handwritten-ocr-uri' in args .command :
314
390
detect_handwritten_ocr_uri (args .uri )
315
- elif 'doc-features' in args .command :
316
- detect_handwritten_ocr_uri (args .uri )
391
+ elif 'batch-annotate-files' in args .command :
392
+ detect_batch_annotate_files_uri (args .uri )
393
+ elif 'batch-annotate-images' in args .command :
394
+ async_batch_annotate_images_uri (args .uri , args .output )
317
395
else :
318
396
if 'object-localization' in args .command :
319
397
localize_objects (args .path )
320
398
elif 'handwritten-ocr' in args .command :
321
399
detect_handwritten_ocr (args .path )
322
- elif 'doc-features ' in args .command :
323
- detect_handwritten_ocr (args .path )
400
+ elif 'batch-annotate-files ' in args .command :
401
+ detect_batch_annotate_files (args .path )
0 commit comments