|
11 | 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
14 |
| -# |
15 |
| - |
16 |
| -# flake8: noqa |
17 |
| - |
18 |
| -# [START documentai_quickstart] |
19 |
| - |
20 |
| -from google.api_core.client_options import ClientOptions |
21 |
| -from google.cloud import documentai # type: ignore |
22 | 14 |
|
23 |
| -# TODO(developer): Uncomment these variables before running the sample. |
24 |
| -# project_id = "YOUR_PROJECT_ID" |
25 |
| -# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu" |
26 |
| -# file_path = "/path/to/local/pdf" |
27 |
| -# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor" |
| 15 | +from google.cloud.documentai_v1.types.document import Document |
| 16 | +from google.cloud.documentai_v1.types.processor import Processor |
28 | 17 |
|
29 | 18 |
|
30 | 19 | def quickstart(
|
31 | 20 | project_id: str,
|
32 | 21 | location: str,
|
33 | 22 | file_path: str,
|
34 |
| - processor_display_name: str = "My Processor", |
35 |
| -): |
36 |
| - # You must set the `api_endpoint`if you use a location other than "us". |
| 23 | + processor_display_name: str, |
| 24 | +) -> tuple[Processor, Document]: |
| 25 | + # [START documentai_quickstart] |
| 26 | + from google.api_core.client_options import ClientOptions |
| 27 | + from google.cloud import documentai_v1 # type: ignore |
| 28 | + |
| 29 | + # TODO(developer): Update and uncomment these variables before running the sample. |
| 30 | + # project_id = "MY_PROJECT_ID" |
| 31 | + |
| 32 | + # Processor location. For example: "us" or "eu". |
| 33 | + # location = "MY_PROCESSOR_LOCATION" |
| 34 | + |
| 35 | + # Path for file to process. |
| 36 | + # file_path = "/path/to/local/pdf" |
| 37 | + |
| 38 | + # Processor display name must be unique per project. |
| 39 | + # processor_display_name = "MY_PROCESSOR_DISPLAY_NAME" |
| 40 | + |
| 41 | + # Set `api_endpoint` if you use a location other than "us". |
37 | 42 | opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
|
38 | 43 |
|
39 |
| - client = documentai.DocumentProcessorServiceClient(client_options=opts) |
| 44 | + # Initialize Document AI client. |
| 45 | + client = documentai_v1.DocumentProcessorServiceClient(client_options=opts) |
40 | 46 |
|
41 |
| - # The full resource name of the location, e.g.: |
42 |
| - # `projects/{project_id}/locations/{location}` |
| 47 | + # Get the full resource name of the location. |
| 48 | + # For example: `projects/{project_id}/locations/{location}` |
43 | 49 | parent = client.common_location_path(project_id, location)
|
44 | 50 |
|
45 |
| - # Create a Processor |
| 51 | + # Create a Processor. |
| 52 | + # For available types, refer to https://cloud.google.com/document-ai/docs/create-processor |
46 | 53 | processor = client.create_processor(
|
47 | 54 | parent=parent,
|
48 |
| - processor=documentai.Processor( |
49 |
| - type_="OCR_PROCESSOR", # Refer to https://cloud.google.com/document-ai/docs/create-processor for how to get available processor types |
| 55 | + processor=documentai_v1.Processor( |
| 56 | + type_="OCR_PROCESSOR", |
50 | 57 | display_name=processor_display_name,
|
51 | 58 | ),
|
52 | 59 | )
|
53 | 60 |
|
54 |
| - # Print the processor information |
| 61 | + # Print the processor information. |
55 | 62 | print(f"Processor Name: {processor.name}")
|
56 | 63 |
|
57 |
| - # Read the file into memory |
| 64 | + # Read the file into memory. |
58 | 65 | with open(file_path, "rb") as image:
|
59 | 66 | image_content = image.read()
|
60 | 67 |
|
61 |
| - # Load binary data |
62 |
| - raw_document = documentai.RawDocument( |
| 68 | + # Load binary data. |
| 69 | + # For supported MIME types, refer to https://cloud.google.com/document-ai/docs/file-types |
| 70 | + raw_document = documentai_v1.RawDocument( |
63 | 71 | content=image_content,
|
64 |
| - mime_type="application/pdf", # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types |
| 72 | + mime_type="application/pdf", |
65 | 73 | )
|
66 | 74 |
|
67 |
| - # Configure the process request |
68 |
| - # `processor.name` is the full resource name of the processor, e.g.: |
69 |
| - # `projects/{project_id}/locations/{location}/processors/{processor_id}` |
70 |
| - request = documentai.ProcessRequest(name=processor.name, raw_document=raw_document) |
| 75 | + # Configure the process request. |
| 76 | + # `processor.name` is the full resource name of the processor, |
| 77 | + # For example: `projects/{project_id}/locations/{location}/processors/{processor_id}` |
| 78 | + request = documentai_v1.ProcessRequest(name=processor.name, raw_document=raw_document) |
71 | 79 |
|
72 | 80 | result = client.process_document(request=request)
|
| 81 | + document = result.document |
73 | 82 |
|
| 83 | + # Read the text recognition output from the processor. |
74 | 84 | # For a full list of `Document` object attributes, reference this page:
|
75 | 85 | # https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
|
76 |
| - document = result.document |
77 |
| - |
78 |
| - # Read the text recognition output from the processor |
79 | 86 | print("The document contains the following text:")
|
80 | 87 | print(document.text)
|
81 | 88 | # [END documentai_quickstart]
|
82 |
| - return processor |
| 89 | + |
| 90 | + return processor, document |
0 commit comments