GoogleCloudPlatform · eapl-gemugami · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
@@ -11,72 +11,80 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-
-# flake8: noqa
-
-# [START documentai_quickstart]
-
-from google.api_core.client_options import ClientOptions
-from google.cloud import documentai  # type: ignore
 
-# TODO(developer): Uncomment these variables before running the sample.
-# project_id = "YOUR_PROJECT_ID"
-# location = "YOUR_PROCESSOR_LOCATION"  # Format is "us" or "eu"
-# file_path = "/path/to/local/pdf"
-# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor"
+from google.cloud.documentai_v1.types.document import Document
+from google.cloud.documentai_v1.types.processor import Processor
 
 
 def quickstart(
     project_id: str,
     location: str,
     file_path: str,
-    processor_display_name: str = "My Processor",
-):
-    # You must set the `api_endpoint`if you use a location other than "us".
+    processor_display_name: str,
+) -> tuple[Processor, Document]:
+    # [START documentai_quickstart]
+    from google.api_core.client_options import ClientOptions
+    from google.cloud import documentai_v1  # type: ignore
+
+    # TODO(developer): Update and uncomment these variables before running the sample.
+    # project_id = "MY_PROJECT_ID"
+
+    # Processor location. For example: "us" or "eu".
+    # location = "MY_PROCESSOR_LOCATION"
+
+    # Path for file to process.
+    # file_path = "/path/to/local/pdf"
+
+    # Processor display name must be unique per project.
+    # processor_display_name = "MY_PROCESSOR_DISPLAY_NAME"
+
+    # Set `api_endpoint` if you use a location other than "us".
     opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
 
-    client = documentai.DocumentProcessorServiceClient(client_options=opts)
+    # Initialize Document AI client.
+    client = documentai_v1.DocumentProcessorServiceClient(client_options=opts)
 
-    # The full resource name of the location, e.g.:
-    # `projects/{project_id}/locations/{location}`
+    # Get the full resource name of the location.
+    # For example: `projects/{project_id}/locations/{location}`
     parent = client.common_location_path(project_id, location)
 
-    # Create a Processor
+    # Create a Processor.
+    # For available types, refer to https://cloud.google.com/document-ai/docs/create-processor
     processor = client.create_processor(
         parent=parent,
-        processor=documentai.Processor(
-            type_="OCR_PROCESSOR",  # Refer to https://cloud.google.com/document-ai/docs/create-processor for how to get available processor types
+        processor=documentai_v1.Processor(
+            type_="OCR_PROCESSOR",
             display_name=processor_display_name,
         ),
     )
 
-    # Print the processor information
+    # Print the processor information.
     print(f"Processor Name: {processor.name}")
 
-    # Read the file into memory
+    # Read the file into memory.
     with open(file_path, "rb") as image:
         image_content = image.read()
 
-    # Load binary data
-    raw_document = documentai.RawDocument(
+    # Load binary data.
+    # For supported MIME types, refer to https://cloud.google.com/document-ai/docs/file-types
+    raw_document = documentai_v1.RawDocument(
         content=image_content,
-        mime_type="application/pdf",  # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
+        mime_type="application/pdf",
     )
 
-    # Configure the process request
-    # `processor.name` is the full resource name of the processor, e.g.:
-    # `projects/{project_id}/locations/{location}/processors/{processor_id}`
-    request = documentai.ProcessRequest(name=processor.name, raw_document=raw_document)
+    # Configure the process request.
+    # `processor.name` is the full resource name of the processor,
+    # For example: `projects/{project_id}/locations/{location}/processors/{processor_id}`
+    request = documentai_v1.ProcessRequest(name=processor.name, raw_document=raw_document)
 
     result = client.process_document(request=request)
+    document = result.document
 
+    # Read the text recognition output from the processor.
     # For a full list of `Document` object attributes, reference this page:
     # https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
-    document = result.document
-
-    # Read the text recognition output from the processor
     print("The document contains the following text:")
     print(document.text)
     # [END documentai_quickstart]
-    return processor
+
+    return processor, document
@@ -1,4 +1,4 @@
-# # Copyright 2020 Google LLC
+# Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,43 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-
-# flake8: noqa
 
 import os
 from uuid import uuid4
 
 from documentai.snippets import quickstart_sample
 
 from google.api_core.client_options import ClientOptions
-from google.cloud import documentai  # type: ignore
+from google.cloud import documentai_v1
 
-location = "us"
-project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
-processor_display_name = f"test-processor-{uuid4()}"
-file_path = "resources/invoice.pdf"
+LOCATION = "us"
+PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
+PROCESSOR_DISPLAY_NAME = f"test-processor-{uuid4()}"
+FILE_PATH = "resources/invoice.pdf"
 
 
-def test_quickstart(capsys):
-    processor = quickstart_sample.quickstart(
-        project_id=project_id,
-        location=location,
-        processor_display_name=processor_display_name,
-        file_path=file_path,
+def test_quickstart() -> None:
+    processor, document = quickstart_sample.quickstart(
+        project_id=PROJECT_ID,
+        location=LOCATION,
+        processor_display_name=PROCESSOR_DISPLAY_NAME,
+        file_path=FILE_PATH,
     )
-    out, _ = capsys.readouterr()
+
+    assert processor is not None
+    assert "Invoice" in document.text
 
     # Delete created processor
-    client = documentai.DocumentProcessorServiceClient(
+    client = documentai_v1.DocumentProcessorServiceClient(
         client_options=ClientOptions(
-            api_endpoint=f"{location}-documentai.googleapis.com"
+            api_endpoint=f"{LOCATION}-documentai.googleapis.com"
         )
     )
     operation = client.delete_processor(name=processor.name)
+
     # Wait for operation to complete
     operation.result()
-
-    assert "Processor Name:" in out
-    assert "text:" in out
-    assert "Invoice" in out