Skip to content

Commit 0eb06ae

Browse files
fix(documentai): refactor 'documentai_quickstart' sample for latest style guide (GoogleCloudPlatform#13256)
* fix(documentai): refactor 'documentai_quickstart' sample for latest style guide - Move the sample inside the function so it's easier to read and run - Rewrite comments - Replace call to 'documentai' with 'documentai_v1', to make it easier to identify which version is being used - Add type hints - Replace checking stderr/out to assert returned objects * fix(documentai): remove space
1 parent 2c74026 commit 0eb06ae

File tree

2 files changed

+61
-57
lines changed

2 files changed

+61
-57
lines changed

documentai/snippets/quickstart_sample.py

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,72 +11,80 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
#
15-
16-
# flake8: noqa
17-
18-
# [START documentai_quickstart]
19-
20-
from google.api_core.client_options import ClientOptions
21-
from google.cloud import documentai # type: ignore
2214

23-
# TODO(developer): Uncomment these variables before running the sample.
24-
# project_id = "YOUR_PROJECT_ID"
25-
# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu"
26-
# file_path = "/path/to/local/pdf"
27-
# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor"
15+
from google.cloud.documentai_v1.types.document import Document
16+
from google.cloud.documentai_v1.types.processor import Processor
2817

2918

3019
def quickstart(
3120
project_id: str,
3221
location: str,
3322
file_path: str,
34-
processor_display_name: str = "My Processor",
35-
):
36-
# You must set the `api_endpoint`if you use a location other than "us".
23+
processor_display_name: str,
24+
) -> tuple[Processor, Document]:
25+
# [START documentai_quickstart]
26+
from google.api_core.client_options import ClientOptions
27+
from google.cloud import documentai_v1 # type: ignore
28+
29+
# TODO(developer): Update and uncomment these variables before running the sample.
30+
# project_id = "MY_PROJECT_ID"
31+
32+
# Processor location. For example: "us" or "eu".
33+
# location = "MY_PROCESSOR_LOCATION"
34+
35+
# Path for file to process.
36+
# file_path = "/path/to/local/pdf"
37+
38+
# Processor display name must be unique per project.
39+
# processor_display_name = "MY_PROCESSOR_DISPLAY_NAME"
40+
41+
# Set `api_endpoint` if you use a location other than "us".
3742
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
3843

39-
client = documentai.DocumentProcessorServiceClient(client_options=opts)
44+
# Initialize Document AI client.
45+
client = documentai_v1.DocumentProcessorServiceClient(client_options=opts)
4046

41-
# The full resource name of the location, e.g.:
42-
# `projects/{project_id}/locations/{location}`
47+
# Get the full resource name of the location.
48+
# For example: `projects/{project_id}/locations/{location}`
4349
parent = client.common_location_path(project_id, location)
4450

45-
# Create a Processor
51+
# Create a Processor.
52+
# For available types, refer to https://cloud.google.com/document-ai/docs/create-processor
4653
processor = client.create_processor(
4754
parent=parent,
48-
processor=documentai.Processor(
49-
type_="OCR_PROCESSOR", # Refer to https://cloud.google.com/document-ai/docs/create-processor for how to get available processor types
55+
processor=documentai_v1.Processor(
56+
type_="OCR_PROCESSOR",
5057
display_name=processor_display_name,
5158
),
5259
)
5360

54-
# Print the processor information
61+
# Print the processor information.
5562
print(f"Processor Name: {processor.name}")
5663

57-
# Read the file into memory
64+
# Read the file into memory.
5865
with open(file_path, "rb") as image:
5966
image_content = image.read()
6067

61-
# Load binary data
62-
raw_document = documentai.RawDocument(
68+
# Load binary data.
69+
# For supported MIME types, refer to https://cloud.google.com/document-ai/docs/file-types
70+
raw_document = documentai_v1.RawDocument(
6371
content=image_content,
64-
mime_type="application/pdf", # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
72+
mime_type="application/pdf",
6573
)
6674

67-
# Configure the process request
68-
# `processor.name` is the full resource name of the processor, e.g.:
69-
# `projects/{project_id}/locations/{location}/processors/{processor_id}`
70-
request = documentai.ProcessRequest(name=processor.name, raw_document=raw_document)
75+
# Configure the process request.
76+
# `processor.name` is the full resource name of the processor,
77+
# For example: `projects/{project_id}/locations/{location}/processors/{processor_id}`
78+
request = documentai_v1.ProcessRequest(name=processor.name, raw_document=raw_document)
7179

7280
result = client.process_document(request=request)
81+
document = result.document
7382

83+
# Read the text recognition output from the processor.
7484
# For a full list of `Document` object attributes, reference this page:
7585
# https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
76-
document = result.document
77-
78-
# Read the text recognition output from the processor
7986
print("The document contains the following text:")
8087
print(document.text)
8188
# [END documentai_quickstart]
82-
return processor
89+
90+
return processor, document
Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# # Copyright 2020 Google LLC
1+
# Copyright 2020 Google LLC
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -11,43 +11,39 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
#
15-
16-
# flake8: noqa
1714

1815
import os
1916
from uuid import uuid4
2017

2118
from documentai.snippets import quickstart_sample
2219

2320
from google.api_core.client_options import ClientOptions
24-
from google.cloud import documentai # type: ignore
21+
from google.cloud import documentai_v1
2522

26-
location = "us"
27-
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
28-
processor_display_name = f"test-processor-{uuid4()}"
29-
file_path = "resources/invoice.pdf"
23+
LOCATION = "us"
24+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
25+
PROCESSOR_DISPLAY_NAME = f"test-processor-{uuid4()}"
26+
FILE_PATH = "resources/invoice.pdf"
3027

3128

32-
def test_quickstart(capsys):
33-
processor = quickstart_sample.quickstart(
34-
project_id=project_id,
35-
location=location,
36-
processor_display_name=processor_display_name,
37-
file_path=file_path,
29+
def test_quickstart() -> None:
30+
processor, document = quickstart_sample.quickstart(
31+
project_id=PROJECT_ID,
32+
location=LOCATION,
33+
processor_display_name=PROCESSOR_DISPLAY_NAME,
34+
file_path=FILE_PATH,
3835
)
39-
out, _ = capsys.readouterr()
36+
37+
assert processor is not None
38+
assert "Invoice" in document.text
4039

4140
# Delete created processor
42-
client = documentai.DocumentProcessorServiceClient(
41+
client = documentai_v1.DocumentProcessorServiceClient(
4342
client_options=ClientOptions(
44-
api_endpoint=f"{location}-documentai.googleapis.com"
43+
api_endpoint=f"{LOCATION}-documentai.googleapis.com"
4544
)
4645
)
4746
operation = client.delete_processor(name=processor.name)
47+
4848
# Wait for operation to complete
4949
operation.result()
50-
51-
assert "Processor Name:" in out
52-
assert "text:" in out
53-
assert "Invoice" in out

0 commit comments

Comments
 (0)