diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0abdf44e334..d102ce8b230 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -43,6 +43,7 @@ /dataproc/**/* @GoogleCloudPlatform/python-samples-reviewers /datastore/**/* @GoogleCloudPlatform/cloud-native-db-dpes @GoogleCloudPlatform/python-samples-reviewers /dns/**/* @GoogleCloudPlatform/python-samples-reviewers +/documentai/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /endpoints/**/* @GoogleCloudPlatform/python-samples-reviewers /eventarc/**/* @GoogleCloudPlatform/aap-dpes @GoogleCloudPlatform/python-samples-reviewers /error_reporting/**/* @GoogleCloudPlatform/python-samples-reviewers diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 1b0f09f6f81..2d5ab187a40 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -13,6 +13,10 @@ # limitations under the License. assign_issues_by: +- labels: + - 'api: documentai' + to: + - GoogleCloudPlatform/dee-data-ai - labels: - 'api: appengine' - 'api: eventarc' @@ -188,6 +192,7 @@ assign_prs_by: to: - GoogleCloudPlatform/infra-db-dpes - labels: + - 'api: documentai' - 'api: retail' to: - GoogleCloudPlatform/dee-data-ai diff --git a/document/README.rst b/document/README.rst deleted file mode 100644 index 9594ec00a83..00000000000 --- a/document/README.rst +++ /dev/null @@ -1,3 +0,0 @@ -These samples have been moved. - -https://github.com/googleapis/python-documentai/tree/main/samples/snippets diff --git a/documentai/AUTHORING_GUIDE.md b/documentai/AUTHORING_GUIDE.md new file mode 100644 index 00000000000..8249522ffc2 --- /dev/null +++ b/documentai/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/documentai/CONTRIBUTING.md b/documentai/CONTRIBUTING.md new file mode 100644 index 00000000000..f5fe2e6baf1 --- /dev/null +++ b/documentai/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md \ No newline at end of file diff --git a/documentai/__init__.py b/documentai/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/documentai/snippets/__init__.py b/documentai/snippets/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/documentai/snippets/batch_process_documents_processor_version_sample.py b/documentai/snippets/batch_process_documents_processor_version_sample.py new file mode 100644 index 00000000000..71834c70d4f --- /dev/null +++ b/documentai/snippets/batch_process_documents_processor_version_sample.py @@ -0,0 +1,163 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_process_documents_processor_version] +import re + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import InternalServerError +from google.api_core.exceptions import RetryError +from google.cloud import documentai +from google.cloud import storage + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Example: aeb8cea219b7c272 +# processor_version_id = "YOUR_PROCESSOR_VERSION_ID" # Example: pretrained-ocr-v1.0-2020-09-23 +# gcs_input_uri = "YOUR_INPUT_URI" # Format: gs://bucket/directory/file.pdf +# input_mime_type = "application/pdf" +# gcs_output_bucket = "YOUR_OUTPUT_BUCKET_NAME" # Format: gs://bucket +# gcs_output_uri_prefix = "YOUR_OUTPUT_URI_PREFIX" # Format: directory/subdirectory/ +# field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. + + +def batch_process_documents_processor_version( + project_id: str, + location: str, + processor_id: str, + processor_version_id: str, + gcs_input_uri: str, + input_mime_type: str, + gcs_output_bucket: str, + gcs_output_uri_prefix: str, + field_mask: str = None, + timeout: int = 400, +): + + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + gcs_document = documentai.GcsDocument( + gcs_uri=gcs_input_uri, mime_type=input_mime_type + ) + + # Load GCS Input URI into a List of document files + gcs_documents = documentai.GcsDocuments(documents=[gcs_document]) + input_config = documentai.BatchDocumentsInputConfig(gcs_documents=gcs_documents) + + # NOTE: Alternatively, specify a GCS URI Prefix to process an entire directory + # + # gcs_input_uri = "gs://bucket/directory/" + # gcs_prefix = documentai.GcsPrefix(gcs_uri_prefix=gcs_input_uri) + # input_config = documentai.BatchDocumentsInputConfig(gcs_prefix=gcs_prefix) + # + + # Cloud Storage URI for the Output Directory + # This must end with a trailing forward slash `/` + destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}" + + gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig( + gcs_uri=destination_uri, field_mask=field_mask + ) + + # Where to write results + output_config = documentai.DocumentOutputConfig(gcs_output_config=gcs_output_config) + + # The full resource name of the processor version + # e.g. projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id} + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + request = documentai.BatchProcessRequest( + name=name, + input_documents=input_config, + document_output_config=output_config, + ) + + # BatchProcess returns a Long Running Operation (LRO) + operation = client.batch_process_documents(request) + + # Continually polls the operation until it is complete. + # This could take some time for larger files + # Format: projects/PROJECT_NUMBER/locations/LOCATION/operations/OPERATION_ID + try: + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result(timeout=timeout) + # Catch exception when operation doesn't finish before timeout + except (RetryError, InternalServerError) as e: + print(e.message) + + # NOTE: Can also use callbacks for asynchronous processing + # + # def my_callback(future): + # result = future.result() + # + # operation.add_done_callback(my_callback) + + # Once the operation is complete, + # get output document information from operation metadata + metadata = documentai.BatchProcessMetadata(operation.metadata) + + if metadata.state != documentai.BatchProcessMetadata.State.SUCCEEDED: + raise ValueError(f"Batch Process Failed: {metadata.state_message}") + + storage_client = storage.Client() + + print("Output files:") + # One process per Input Document + for process in metadata.individual_process_statuses: + # output_gcs_destination format: gs://BUCKET/PREFIX/OPERATION_NUMBER/INPUT_FILE_NUMBER/ + # The Cloud Storage API requires the bucket name and URI prefix separately + matches = re.match(r"gs://(.*?)/(.*)", process.output_gcs_destination) + if not matches: + print( + "Could not parse output GCS destination:", + process.output_gcs_destination, + ) + continue + + output_bucket, output_prefix = matches.groups() + + # Get List of Document Objects from the Output Bucket + output_blobs = storage_client.list_blobs(output_bucket, prefix=output_prefix) + + # Document AI may output multiple JSON files per source file + for blob in output_blobs: + # Document AI should only output JSON files to GCS + if ".json" not in blob.name: + print( + f"Skipping non-supported file: {blob.name} - Mimetype: {blob.content_type}" + ) + continue + + # Download JSON File as bytes object and convert to Document Object + print(f"Fetching {blob.name}") + document = documentai.Document.from_json( + blob.download_as_bytes(), ignore_unknown_fields=True + ) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_batch_process_documents_processor_version] diff --git a/documentai/snippets/batch_process_documents_processor_version_sample_test.py b/documentai/snippets/batch_process_documents_processor_version_sample_test.py new file mode 100644 index 00000000000..c5416ca5327 --- /dev/null +++ b/documentai/snippets/batch_process_documents_processor_version_sample_test.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from documentai.snippets import \ + batch_process_documents_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +processor_version_id = "pretrained-form-parser-v1.0-2020-09-23" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +input_mime_type = "application/pdf" +gcs_output_bucket = "gs://document-ai-python" +gcs_output_uri_prefix = f"{uuid4()}/" +field_mask = "text,pages.pageNumber" + + +def test_batch_process_documents_processor_version(capsys): + batch_process_documents_processor_version_sample.batch_process_documents_processor_version( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + gcs_input_uri=gcs_input_uri, + input_mime_type=input_mime_type, + gcs_output_bucket=gcs_output_bucket, + gcs_output_uri_prefix=gcs_output_uri_prefix, + field_mask=field_mask, + ) + out, _ = capsys.readouterr() + + assert "operation" in out + assert "Fetching" in out + assert "text:" in out diff --git a/documentai/snippets/batch_process_documents_sample.py b/documentai/snippets/batch_process_documents_sample.py new file mode 100644 index 00000000000..ee38a273517 --- /dev/null +++ b/documentai/snippets/batch_process_documents_sample.py @@ -0,0 +1,159 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_process_document] +import re + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import InternalServerError +from google.api_core.exceptions import RetryError +from google.cloud import documentai +from google.cloud import storage + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# gcs_input_uri = "YOUR_INPUT_URI" # Format: gs://bucket/directory/file.pdf +# input_mime_type = "application/pdf" +# gcs_output_bucket = "YOUR_OUTPUT_BUCKET_NAME" # Format: gs://bucket +# gcs_output_uri_prefix = "YOUR_OUTPUT_URI_PREFIX" # Format: directory/subdirectory/ +# field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. + + +def batch_process_documents( + project_id: str, + location: str, + processor_id: str, + gcs_input_uri: str, + input_mime_type: str, + gcs_output_bucket: str, + gcs_output_uri_prefix: str, + field_mask: str = None, + timeout: int = 400, +): + + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + gcs_document = documentai.GcsDocument( + gcs_uri=gcs_input_uri, mime_type=input_mime_type + ) + + # Load GCS Input URI into a List of document files + gcs_documents = documentai.GcsDocuments(documents=[gcs_document]) + input_config = documentai.BatchDocumentsInputConfig(gcs_documents=gcs_documents) + + # NOTE: Alternatively, specify a GCS URI Prefix to process an entire directory + # + # gcs_input_uri = "gs://bucket/directory/" + # gcs_prefix = documentai.GcsPrefix(gcs_uri_prefix=gcs_input_uri) + # input_config = documentai.BatchDocumentsInputConfig(gcs_prefix=gcs_prefix) + # + + # Cloud Storage URI for the Output Directory + # This must end with a trailing forward slash `/` + destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}" + + gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig( + gcs_uri=destination_uri, field_mask=field_mask + ) + + # Where to write results + output_config = documentai.DocumentOutputConfig(gcs_output_config=gcs_output_config) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + request = documentai.BatchProcessRequest( + name=name, + input_documents=input_config, + document_output_config=output_config, + ) + + # BatchProcess returns a Long Running Operation (LRO) + operation = client.batch_process_documents(request) + + # Continually polls the operation until it is complete. + # This could take some time for larger files + # Format: projects/PROJECT_NUMBER/locations/LOCATION/operations/OPERATION_ID + try: + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result(timeout=timeout) + # Catch exception when operation doesn't finish before timeout + except (RetryError, InternalServerError) as e: + print(e.message) + + # NOTE: Can also use callbacks for asynchronous processing + # + # def my_callback(future): + # result = future.result() + # + # operation.add_done_callback(my_callback) + + # Once the operation is complete, + # get output document information from operation metadata + metadata = documentai.BatchProcessMetadata(operation.metadata) + + if metadata.state != documentai.BatchProcessMetadata.State.SUCCEEDED: + raise ValueError(f"Batch Process Failed: {metadata.state_message}") + + storage_client = storage.Client() + + print("Output files:") + # One process per Input Document + for process in metadata.individual_process_statuses: + # output_gcs_destination format: gs://BUCKET/PREFIX/OPERATION_NUMBER/INPUT_FILE_NUMBER/ + # The Cloud Storage API requires the bucket name and URI prefix separately + matches = re.match(r"gs://(.*?)/(.*)", process.output_gcs_destination) + if not matches: + print( + "Could not parse output GCS destination:", + process.output_gcs_destination, + ) + continue + + output_bucket, output_prefix = matches.groups() + + # Get List of Document Objects from the Output Bucket + output_blobs = storage_client.list_blobs(output_bucket, prefix=output_prefix) + + # Document AI may output multiple JSON files per source file + for blob in output_blobs: + # Document AI should only output JSON files to GCS + if ".json" not in blob.name: + print( + f"Skipping non-supported file: {blob.name} - Mimetype: {blob.content_type}" + ) + continue + + # Download JSON File as bytes object and convert to Document Object + print(f"Fetching {blob.name}") + document = documentai.Document.from_json( + blob.download_as_bytes(), ignore_unknown_fields=True + ) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_batch_process_document] diff --git a/documentai/snippets/batch_process_documents_sample_test.py b/documentai/snippets/batch_process_documents_sample_test.py new file mode 100644 index 00000000000..d22c644dcf8 --- /dev/null +++ b/documentai/snippets/batch_process_documents_sample_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from documentai.snippets import batch_process_documents_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +input_mime_type = "application/pdf" +gcs_output_bucket = "gs://document-ai-python" +gcs_output_uri_prefix = f"{uuid4()}/" +field_mask = "text,pages.pageNumber" + + +def test_batch_process_documents(capsys): + batch_process_documents_sample.batch_process_documents( + project_id=project_id, + location=location, + processor_id=processor_id, + gcs_input_uri=gcs_input_uri, + input_mime_type=input_mime_type, + gcs_output_bucket=gcs_output_bucket, + gcs_output_uri_prefix=gcs_output_uri_prefix, + field_mask=field_mask, + ) + out, _ = capsys.readouterr() + + assert "operation" in out + assert "Fetching" in out + assert "text:" in out diff --git a/documentai/snippets/cancel_operation_sample.py b/documentai/snippets/cancel_operation_sample.py new file mode 100644 index 00000000000..31f3b85f8e6 --- /dev/null +++ b/documentai/snippets/cancel_operation_sample.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_cancel_operation] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.api_core.exceptions import NotFound +from google.cloud import documentai +from google.longrunning.operations_pb2 import CancelOperationRequest + +# TODO(developer): Uncomment these variables before running the sample. +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# operation_name = 'YOUR_OPERATION_NAME' # Format is 'projects/project_id/locations/location/operations/operation_id' + + +def cancel_operation_sample(location: str, operation_name: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + request = CancelOperationRequest(name=operation_name) + + # Make CancelOperation request + try: + client.cancel_operation(request=request) + print(f"Operation {operation_name} cancelled") + except (FailedPrecondition, NotFound) as e: + print(e.message) + + +# [END documentai_cancel_operation] diff --git a/documentai/snippets/cancel_operation_sample_test.py b/documentai/snippets/cancel_operation_sample_test.py new file mode 100644 index 00000000000..6617787291b --- /dev/null +++ b/documentai/snippets/cancel_operation_sample_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import cancel_operation_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +operation_id = "4311241022337572151" +operation_name = f"projects/{project_id}/locations/{location}/operations/{operation_id}" + + +def test_cancel_operation(capsys): + cancel_operation_sample.cancel_operation_sample( + location=location, operation_name=operation_name + ) + out, _ = capsys.readouterr() + + assert "Operation" in out diff --git a/documentai/snippets/create_processor_sample.py b/documentai/snippets/create_processor_sample.py new file mode 100644 index 00000000000..b4f23c3a2c7 --- /dev/null +++ b/documentai/snippets/create_processor_sample.py @@ -0,0 +1,54 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_create_processor] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_display_name = 'YOUR_PROCESSOR_DISPLAY_NAME' # Must be unique per project, e.g.: 'My Processor' +# processor_type = 'YOUR_PROCESSOR_TYPE' # Use fetch_processor_types to get available processor types + + +def create_processor_sample( + project_id: str, location: str, processor_display_name: str, processor_type: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the location + # e.g.: projects/project_id/locations/location + parent = client.common_location_path(project_id, location) + + # Create a processor + processor = client.create_processor( + parent=parent, + processor=documentai.Processor( + display_name=processor_display_name, type_=processor_type + ), + ) + + # Print the processor information + print(f"Processor Name: {processor.name}") + print(f"Processor Display Name: {processor.display_name}") + print(f"Processor Type: {processor.type_}") + + +# [END documentai_create_processor] diff --git a/documentai/snippets/create_processor_sample_test.py b/documentai/snippets/create_processor_sample_test.py new file mode 100644 index 00000000000..43233fc7dc9 --- /dev/null +++ b/documentai/snippets/create_processor_sample_test.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from documentai.snippets import create_processor_sample +import mock + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_display_name = f"test-processor-{uuid4()}" +processor_type = "OCR_PROCESSOR" + + +@mock.patch("google.cloud.documentai.DocumentProcessorServiceClient.create_processor") +@mock.patch("google.cloud.documentai.Processor") +def test_create_processor(create_processor_mock, processor_mock, capsys): + create_processor_mock.return_value = processor_mock + + create_processor_sample.create_processor_sample( + project_id=project_id, + location=location, + processor_display_name=processor_display_name, + processor_type=processor_type, + ) + + create_processor_mock.assert_called_once() + + out, _ = capsys.readouterr() + + assert "Processor Name:" in out + assert "Processor Display Name:" in out + assert "Processor Type:" in out diff --git a/documentai/snippets/delete_processor_sample.py b/documentai/snippets/delete_processor_sample.py new file mode 100644 index 00000000000..2885fc6ef61 --- /dev/null +++ b/documentai/snippets/delete_processor_sample.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_delete_processor] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' + + +def delete_processor_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + processor_name = client.processor_path(project_id, location, processor_id) + + # Delete a processor + operation = client.delete_processor(name=processor_name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + + +# [END documentai_delete_processor] diff --git a/documentai/snippets/delete_processor_sample_test.py b/documentai/snippets/delete_processor_sample_test.py new file mode 100644 index 00000000000..963539b71be --- /dev/null +++ b/documentai/snippets/delete_processor_sample_test.py @@ -0,0 +1,40 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import delete_processor_sample +import mock + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "aaaaaaaaa" +parent = f"projects/{project_id}/locations/{location}/processors/{processor_id}" + + +@mock.patch("google.cloud.documentai.DocumentProcessorServiceClient.delete_processor") +@mock.patch("google.api_core.operation.Operation") +def test_delete_processor(operation_mock, delete_processor_mock, capsys): + delete_processor_mock.return_value = operation_mock + + delete_processor_sample.delete_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + + delete_processor_mock.assert_called_once() + + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/documentai/snippets/delete_processor_version_sample.py b/documentai/snippets/delete_processor_version_sample.py new file mode 100644 index 00000000000..244747cc28a --- /dev/null +++ b/documentai/snippets/delete_processor_version_sample.py @@ -0,0 +1,59 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_delete_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.api_core.exceptions import InvalidArgument +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def delete_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make DeleteProcessorVersion request + try: + operation = client.delete_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Delete request will fail if the + # processor version doesn't exist + # or if a request is made on a pretrained processor version + # or the default processor version + except (FailedPrecondition, InvalidArgument) as e: + print(e.message) + + +# [END documentai_delete_processor_version] diff --git a/documentai/snippets/delete_processor_version_sample_test.py b/documentai/snippets/delete_processor_version_sample_test.py new file mode 100644 index 00000000000..73a24cfde1d --- /dev/null +++ b/documentai/snippets/delete_processor_version_sample_test.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import delete_processor_version_sample +import mock + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" + + +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.delete_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_delete_processor_version( + operation_mock, delete_processor_version_mock, capsys +): + delete_processor_version_mock.return_value = operation_mock + + delete_processor_version_sample.delete_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + + delete_processor_version_mock.assert_called_once() + + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/documentai/snippets/deploy_processor_version_sample.py b/documentai/snippets/deploy_processor_version_sample.py new file mode 100644 index 00000000000..45c81a552da --- /dev/null +++ b/documentai/snippets/deploy_processor_version_sample.py @@ -0,0 +1,56 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_deploy_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def deploy_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make DeployProcessorVersion request + try: + operation = client.deploy_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Deploy request will fail if the + # processor version is already deployed + except FailedPrecondition as e: + print(e.message) + + +# [END documentai_deploy_processor_version] diff --git a/documentai/snippets/deploy_processor_version_sample_test.py b/documentai/snippets/deploy_processor_version_sample_test.py new file mode 100644 index 00000000000..e657108e965 --- /dev/null +++ b/documentai/snippets/deploy_processor_version_sample_test.py @@ -0,0 +1,48 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import deploy_processor_version_sample +import mock + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" + + +# TODO: Switch to Real Endpoint when Deployable Versions are Available +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.deploy_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_deploy_processor_version( + operation_mock, deploy_processor_version_mock, capsys +): + deploy_processor_version_mock.return_value = operation_mock + + deploy_processor_version_sample.deploy_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + + deploy_processor_version_mock.assert_called_once() + + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/documentai/snippets/disable_processor_sample.py b/documentai/snippets/disable_processor_sample.py new file mode 100644 index 00000000000..40155a4d20c --- /dev/null +++ b/documentai/snippets/disable_processor_sample.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_disable_processor] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' + + +def disable_processor_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + processor_name = client.processor_path(project_id, location, processor_id) + request = documentai.DisableProcessorRequest(name=processor_name) + + # Make DisableProcessor request + try: + operation = client.disable_processor(request=request) + + # Print operation name + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Cannot disable a processor that is already disabled + except FailedPrecondition as e: + print(e.message) + + +# [END documentai_disable_processor] diff --git a/documentai/snippets/disable_processor_sample_test.py b/documentai/snippets/disable_processor_sample_test.py new file mode 100644 index 00000000000..6cd98479d16 --- /dev/null +++ b/documentai/snippets/disable_processor_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import disable_processor_sample +from documentai.snippets import enable_processor_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "5e32eb3e1d0237c0" + + +def test_disable_processor(capsys): + disable_processor_sample.disable_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + out, _ = capsys.readouterr() + + assert "projects" in out or "DISABLED" in out + + # Re-Enable Processor + enable_processor_sample.enable_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) diff --git a/documentai/snippets/enable_processor_sample.py b/documentai/snippets/enable_processor_sample.py new file mode 100644 index 00000000000..d4e43461ed2 --- /dev/null +++ b/documentai/snippets/enable_processor_sample.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_enable_processor] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' + + +def enable_processor_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the location + # e.g.: projects/project_id/locations/location/processors/processor_id + processor_name = client.processor_path(project_id, location, processor_id) + request = documentai.EnableProcessorRequest(name=processor_name) + + # Make EnableProcessor request + try: + operation = client.enable_processor(request=request) + + # Print operation name + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Cannot enable a processor that is already enabled + except FailedPrecondition as e: + print(e.message) + + +# [END documentai_enable_processor] diff --git a/documentai/snippets/enable_processor_sample_test.py b/documentai/snippets/enable_processor_sample_test.py new file mode 100644 index 00000000000..0374a642356 --- /dev/null +++ b/documentai/snippets/enable_processor_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import disable_processor_sample +from documentai.snippets import enable_processor_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "351535be16606fe3" + + +def test_enable_processor(capsys): + enable_processor_sample.enable_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + out, _ = capsys.readouterr() + + assert "projects" in out or "ENABLED" in out + + # Re-Disable Processor + disable_processor_sample.disable_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) diff --git a/documentai/snippets/fetch_processor_types_sample.py b/documentai/snippets/fetch_processor_types_sample.py new file mode 100644 index 00000000000..19670e09484 --- /dev/null +++ b/documentai/snippets/fetch_processor_types_sample.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_fetch_processor_types] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' + + +def fetch_processor_types_sample(project_id: str, location: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the location + # e.g.: projects/project_id/locations/location + parent = client.common_location_path(project_id, location) + + # Fetch all processor types + response = client.fetch_processor_types(parent=parent) + + print("Processor types:") + # Print the available processor types + for processor_type in response.processor_types: + if processor_type.allow_creation: + print(processor_type.type_) + + +# [END documentai_fetch_processor_types] diff --git a/documentai/snippets/fetch_processor_types_sample_test.py b/documentai/snippets/fetch_processor_types_sample_test.py new file mode 100644 index 00000000000..dcd27ff9e6c --- /dev/null +++ b/documentai/snippets/fetch_processor_types_sample_test.py @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import fetch_processor_types_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] + + +def test_fetch_processor_types(capsys): + fetch_processor_types_sample.fetch_processor_types_sample( + project_id=project_id, location=location + ) + out, _ = capsys.readouterr() + + assert "OCR_PROCESSOR" in out + assert "FORM_PARSER_PROCESSOR" in out diff --git a/documentai/snippets/get_operation_sample.py b/documentai/snippets/get_operation_sample.py new file mode 100644 index 00000000000..386ee1201e1 --- /dev/null +++ b/documentai/snippets/get_operation_sample.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_get_operation] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import NotFound +from google.cloud import documentai +from google.longrunning.operations_pb2 import GetOperationRequest + +# TODO(developer): Uncomment these variables before running the sample. +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# operation_name = 'YOUR_OPERATION_NAME' # Format is 'projects/project_id/locations/location/operations/operation_id' + + +def get_operation_sample(location: str, operation_name: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + request = GetOperationRequest(name=operation_name) + + # Make GetOperation request + try: + operation = client.get_operation(request=request) + # Print the Operation Information + print(operation) + except (NotFound) as e: + print(e.message) + + +# [END documentai_get_operation] diff --git a/documentai/snippets/get_operation_sample_test.py b/documentai/snippets/get_operation_sample_test.py new file mode 100644 index 00000000000..b7ac48d699b --- /dev/null +++ b/documentai/snippets/get_operation_sample_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import get_operation_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +operation_id = "10828996427112056798" +operation_name = f"projects/{project_id}/locations/{location}/operations/{operation_id}" + + +def test_get_operation(capsys): + get_operation_sample.get_operation_sample( + location=location, operation_name=operation_name + ) + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/documentai/snippets/get_processor_sample.py b/documentai/snippets/get_processor_sample.py new file mode 100644 index 00000000000..ebe63d9a15d --- /dev/null +++ b/documentai/snippets/get_processor_sample.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_get_processor] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' + + +def get_processor_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/{project_id}/locations/{location}/processors/{processor_id} + name = client.processor_path(project_id, location, processor_id) + + # Make GetProcessor request + processor = client.get_processor(name=name) + + # Print the processor information + print(f"Processor Name: {processor.name}") + print(f"Processor Display Name: {processor.display_name}") + print(f"Processor Type: {processor.type_}") + + +# [END documentai_get_processor] diff --git a/documentai/snippets/get_processor_sample_test.py b/documentai/snippets/get_processor_sample_test.py new file mode 100644 index 00000000000..e3873ddeb5f --- /dev/null +++ b/documentai/snippets/get_processor_sample_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import get_processor_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "52a38e080c1a7296" + + +def test_get_processor(capsys): + get_processor_sample.get_processor_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + out, _ = capsys.readouterr() + + assert "Processor Name:" in out + assert "Processor Display Name:" in out + assert "OCR_PROCESSOR" in out + assert processor_id in out diff --git a/documentai/snippets/get_processor_version_sample.py b/documentai/snippets/get_processor_version_sample.py new file mode 100644 index 00000000000..6c460bc29a6 --- /dev/null +++ b/documentai/snippets/get_processor_version_sample.py @@ -0,0 +1,51 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_get_processor_version] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def get_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make GetProcessorVersion request + processor_version = client.get_processor_version(name=name) + + # Print the processor version information + print(f"Processor Version: {processor_version_id}") + print(f"Display Name: {processor_version.display_name}") + print(processor_version.state) + + +# [END documentai_get_processor_version] diff --git a/documentai/snippets/get_processor_version_sample_test.py b/documentai/snippets/get_processor_version_sample_test.py new file mode 100644 index 00000000000..831140f40df --- /dev/null +++ b/documentai/snippets/get_processor_version_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import get_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "52a38e080c1a7296" +processor_version_id = "pretrained-ocr-v1.0-2020-09-23" + + +def test_get_processor_version(capsys): + get_processor_version_sample.get_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + out, _ = capsys.readouterr() + + assert "Processor Version: pretrained-ocr" in out + assert "Display Name: Google Stable" in out + assert "DEPLOYED" in out diff --git a/documentai/snippets/list_operations_sample.py b/documentai/snippets/list_operations_sample.py new file mode 100644 index 00000000000..2a3573be77a --- /dev/null +++ b/documentai/snippets/list_operations_sample.py @@ -0,0 +1,55 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_list_operations] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai +from google.longrunning.operations_pb2 import ListOperationsRequest + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' + +# Create filter in https://google.aip.dev/160 syntax +# For full options, refer to: +# https://cloud.google.com/document-ai/docs/long-running-operations#listing_long-running_operations +# operations_filter = 'YOUR_FILTER' + +# Example: +# operations_filter = "TYPE=BATCH_PROCESS_DOCUMENTS AND STATE=RUNNING" + + +def list_operations_sample(project_id: str, location: str, operations_filter: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # Format: projects/project_id/locations/location + name = client.common_location_path(project=project_id, location=location) + request = ListOperationsRequest( + name=f"{name}/operations", + filter=operations_filter, + ) + + # Make ListOperations request + operations = client.list_operations(request=request) + + # Print the Operation Information + print(operations) + + +# [END documentai_list_operations] diff --git a/documentai/snippets/list_operations_sample_test.py b/documentai/snippets/list_operations_sample_test.py new file mode 100644 index 00000000000..b433a62db91 --- /dev/null +++ b/documentai/snippets/list_operations_sample_test.py @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import list_operations_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +operations_filter = "TYPE=BATCH_PROCESS_DOCUMENTS AND STATE=DONE" + + +def test_list_operations(capsys): + list_operations_sample.list_operations_sample( + project_id=project_id, location=location, operations_filter=operations_filter + ) + out, _ = capsys.readouterr() + + assert "operations" in out diff --git a/documentai/snippets/list_processor_versions_sample.py b/documentai/snippets/list_processor_versions_sample.py new file mode 100644 index 00000000000..9affed92586 --- /dev/null +++ b/documentai/snippets/list_processor_versions_sample.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_list_processor_versions] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample + + +def list_processor_versions_sample(project_id: str, location: str, processor_id: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + parent = client.processor_path(project_id, location, processor_id) + + # Make ListProcessorVersions request + processor_versions = client.list_processor_versions(parent=parent) + + # Print the processor version information + for processor_version in processor_versions: + processor_version_id = client.parse_processor_version_path( + processor_version.name + )["processor_version"] + + print(f"Processor Version: {processor_version_id}") + print(f"Display Name: {processor_version.display_name}") + print(processor_version.state) + print("") + + +# [END documentai_list_processor_versions] diff --git a/documentai/snippets/list_processor_versions_sample_test.py b/documentai/snippets/list_processor_versions_sample_test.py new file mode 100644 index 00000000000..d7bb44e3b1f --- /dev/null +++ b/documentai/snippets/list_processor_versions_sample_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import list_processor_versions_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "52a38e080c1a7296" + + +def test_list_processor_versions(capsys): + list_processor_versions_sample.list_processor_versions_sample( + project_id=project_id, location=location, processor_id=processor_id + ) + out, _ = capsys.readouterr() + + assert "Processor Version: pretrained-ocr" in out + assert "Display Name: Google Stable" in out + assert "Display Name: Google Release Candidate" in out + assert "DEPLOYED" in out diff --git a/documentai/snippets/list_processors_sample.py b/documentai/snippets/list_processors_sample.py new file mode 100644 index 00000000000..d4a654eb969 --- /dev/null +++ b/documentai/snippets/list_processors_sample.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_list_processors] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' + + +def list_processors_sample(project_id: str, location: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the location + # e.g.: projects/project_id/locations/location + parent = client.common_location_path(project_id, location) + + # Make ListProcessors request + processor_list = client.list_processors(parent=parent) + + # Print the processor information + for processor in processor_list: + print(f"Processor Name: {processor.name}") + print(f"Processor Display Name: {processor.display_name}") + print(f"Processor Type: {processor.type_}") + print("") + + +# [END documentai_list_processors] diff --git a/documentai/snippets/list_processors_sample_test.py b/documentai/snippets/list_processors_sample_test.py new file mode 100644 index 00000000000..72520677e14 --- /dev/null +++ b/documentai/snippets/list_processors_sample_test.py @@ -0,0 +1,33 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import list_processors_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] + + +def test_list_processors(capsys): + list_processors_sample.list_processors_sample( + project_id=project_id, location=location + ) + out, _ = capsys.readouterr() + + assert "Processor Name:" in out + assert "Processor Display Name:" in out + assert "OCR_PROCESSOR" in out + assert "FORM_PARSER_PROCESSOR" in out diff --git a/documentai/snippets/poll_operation_sample.py b/documentai/snippets/poll_operation_sample.py new file mode 100644 index 00000000000..17a055a8267 --- /dev/null +++ b/documentai/snippets/poll_operation_sample.py @@ -0,0 +1,57 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_poll_operation] + +from time import sleep + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import NotFound +from google.cloud import documentai +from google.longrunning.operations_pb2 import GetOperationRequest + +# TODO(developer): Uncomment these variables before running the sample. +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# operation_name = 'YOUR_OPERATION_NAME' # Format is 'projects/project_id/locations/location/operations/operation_id' + + +def poll_operation_sample(location: str, operation_name: str): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + request = GetOperationRequest(name=operation_name) + + while True: + # Make GetOperation request + try: + operation = client.get_operation(request=request) + except (NotFound) as e: + print(e.message) + break + + # Print the Operation Information + print(operation) + + # Stop Polling when Operation is no longer running + if operation.done: + break + + # Wait 10 seconds before polling again + sleep(10) + + +# [END documentai_poll_operation] diff --git a/documentai/snippets/poll_operation_sample_test.py b/documentai/snippets/poll_operation_sample_test.py new file mode 100644 index 00000000000..e1aa762ca06 --- /dev/null +++ b/documentai/snippets/poll_operation_sample_test.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import poll_operation_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +operation_id = "10828996427112056798" +operation_name = f"projects/{project_id}/locations/{location}/operations/{operation_id}" + + +def test_poll_operation(capsys): + poll_operation_sample.poll_operation_sample( + location=location, operation_name=operation_name + ) + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/documentai/snippets/process_document_form_sample.py b/documentai/snippets/process_document_form_sample.py new file mode 100644 index 00000000000..f3b323bdd29 --- /dev/null +++ b/documentai/snippets/process_document_form_sample.py @@ -0,0 +1,128 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_form_document] + +from typing import Sequence + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def process_document_form_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # Online processing request to Document AI + document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Read the table and form fields output from the processor + # The form processor also contains OCR data. For more information + # on how to parse OCR data please see the OCR sample. + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + + text = document.text + print(f"Full document text: {repr(text)}\n") + print(f"There are {len(document.pages)} page(s) in this document.") + + # Read the form fields and tables output from the processor + for page in document.pages: + print(f"\n\n**** Page {page.page_number} ****") + + print(f"\nFound {len(page.tables)} table(s):") + for table in page.tables: + num_collumns = len(table.header_rows[0].cells) + num_rows = len(table.body_rows) + print(f"Table with {num_collumns} columns and {num_rows} rows:") + + # Print header rows + print("Columns:") + print_table_rows(table.header_rows, text) + # Print body rows + print("Table body data:") + print_table_rows(table.body_rows, text) + + print(f"\nFound {len(page.form_fields)} form field(s):") + for field in page.form_fields: + name = layout_to_text(field.field_name, text) + value = layout_to_text(field.field_value, text) + print(f" * {repr(name.strip())}: {repr(value.strip())}") + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +def print_table_rows( + table_rows: Sequence[documentai.Document.Page.Table.TableRow], text: str +) -> None: + for table_row in table_rows: + row_text = "" + for cell in table_row.cells: + cell_text = layout_to_text(cell.layout, text) + row_text += f"{repr(cell_text.strip())} | " + print(row_text) + + +def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str: + """ + Document AI identifies text in different parts of the document by their + offsets in the entirety of the document's text. This function converts + offsets to a string. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in layout.text_anchor.text_segments: + start_index = int(segment.start_index) + end_index = int(segment.end_index) + response += text[start_index:end_index] + return response + + +# [END documentai_process_form_document] diff --git a/documentai/snippets/process_document_form_sample_test.py b/documentai/snippets/process_document_form_sample_test.py new file mode 100644 index 00000000000..cea2e2a3483 --- /dev/null +++ b/documentai/snippets/process_document_form_sample_test.py @@ -0,0 +1,44 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_form_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + process_document_form_sample.process_document_form_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + expected_strings = [ + "There are 1 page(s) in this document.", + "Table with 4 columns and 6 rows", + "Found 13 form field(s)", + "'BALANCE DUE': '$2140.00'", + ] + for expected_string in expected_strings: + assert expected_string in out diff --git a/documentai/snippets/process_document_ocr_sample.py b/documentai/snippets/process_document_ocr_sample.py new file mode 100644 index 00000000000..a8fb6605e09 --- /dev/null +++ b/documentai/snippets/process_document_ocr_sample.py @@ -0,0 +1,179 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_ocr_document] + +from typing import Sequence + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version = 'rc' # Refer to https://cloud.google.com/document-ai/docs/manage-processor-versions for more information +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def process_document_ocr_sample( + project_id: str, + location: str, + processor_id: str, + processor_version: str, + file_path: str, + mime_type: str, +) -> None: + # Online processing request to Document AI + document = process_document( + project_id, location, processor_id, processor_version, file_path, mime_type + ) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + + text = document.text + print(f"Full document text: {text}\n") + print(f"There are {len(document.pages)} page(s) in this document.\n") + + for page in document.pages: + print(f"Page {page.page_number}:") + print_page_dimensions(page.dimension) + print_detected_langauges(page.detected_languages) + print_paragraphs(page.paragraphs, text) + print_blocks(page.blocks, text) + print_lines(page.lines, text) + print_tokens(page.tokens, text) + + # Currently supported in version pretrained-ocr-v1.1-2022-09-12 + if page.image_quality_scores: + print_image_quality_scores(page.image_quality_scores) + + +def process_document( + project_id: str, + location: str, + processor_id: str, + processor_version: str, + file_path: str, + mime_type: str, +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g. projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id} + # You must create processors before running sample code. + name = client.processor_version_path( + project_id, location, processor_id, processor_version + ) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +def print_page_dimensions(dimension: documentai.Document.Page.Dimension) -> None: + print(f" Width: {str(dimension.width)}") + print(f" Height: {str(dimension.height)}") + + +def print_detected_langauges( + detected_languages: Sequence[documentai.Document.Page.DetectedLanguage], +) -> None: + print(" Detected languages:") + for lang in detected_languages: + code = lang.language_code + print(f" {code} ({lang.confidence:.1%} confidence)") + + +def print_paragraphs( + paragraphs: Sequence[documentai.Document.Page.Paragraph], text: str +) -> None: + print(f" {len(paragraphs)} paragraphs detected:") + first_paragraph_text = layout_to_text(paragraphs[0].layout, text) + print(f" First paragraph text: {repr(first_paragraph_text)}") + last_paragraph_text = layout_to_text(paragraphs[-1].layout, text) + print(f" Last paragraph text: {repr(last_paragraph_text)}") + + +def print_blocks(blocks: Sequence[documentai.Document.Page.Block], text: str) -> None: + print(f" {len(blocks)} blocks detected:") + first_block_text = layout_to_text(blocks[0].layout, text) + print(f" First text block: {repr(first_block_text)}") + last_block_text = layout_to_text(blocks[-1].layout, text) + print(f" Last text block: {repr(last_block_text)}") + + +def print_lines(lines: Sequence[documentai.Document.Page.Line], text: str) -> None: + print(f" {len(lines)} lines detected:") + first_line_text = layout_to_text(lines[0].layout, text) + print(f" First line text: {repr(first_line_text)}") + last_line_text = layout_to_text(lines[-1].layout, text) + print(f" Last line text: {repr(last_line_text)}") + + +def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) -> None: + print(f" {len(tokens)} tokens detected:") + first_token_text = layout_to_text(tokens[0].layout, text) + first_token_break_type = tokens[0].detected_break.type_.name + print(f" First token text: {repr(first_token_text)}") + print(f" First token break type: {repr(first_token_break_type)}") + last_token_text = layout_to_text(tokens[-1].layout, text) + last_token_break_type = tokens[-1].detected_break.type_.name + print(f" Last token text: {repr(last_token_text)}") + print(f" Last token break type: {repr(last_token_break_type)}") + + +def print_image_quality_scores( + image_quality_scores: documentai.Document.Page.ImageQualityScores, +) -> None: + print(f" Quality score: {image_quality_scores.quality_score:.1%}") + print(" Detected defects:") + + for detected_defect in image_quality_scores.detected_defects: + print(f" {detected_defect.type_}: {detected_defect.confidence:.1%}") + + +def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str: + """ + Document AI identifies text in different parts of the document by their + offsets in the entirety of the document's text. This function converts + offsets to a string. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in layout.text_anchor.text_segments: + start_index = int(segment.start_index) + end_index = int(segment.end_index) + response += text[start_index:end_index] + return response + + +# [END documentai_process_ocr_document] diff --git a/documentai/snippets/process_document_ocr_sample_test.py b/documentai/snippets/process_document_ocr_sample_test.py new file mode 100644 index 00000000000..b2ead614714 --- /dev/null +++ b/documentai/snippets/process_document_ocr_sample_test.py @@ -0,0 +1,41 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_ocr_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "52a38e080c1a7296" +processor_version = "pretrained-ocr-v1.1-2022-09-12" +file_path = "resources/handwritten_form.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + process_document_ocr_sample.process_document_ocr_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version=processor_version, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + assert "Page 1" in out + assert "en" in out + assert "FakeDoc" in out diff --git a/documentai/snippets/process_document_processor_version_sample.py b/documentai/snippets/process_document_processor_version_sample.py new file mode 100644 index 00000000000..ecc1b2ed4eb --- /dev/null +++ b/documentai/snippets/process_document_processor_version_sample.py @@ -0,0 +1,74 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_document_processor_version] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' # Processor version to use +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types +# field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. + + +def process_document_processor_version_sample( + project_id: str, + location: str, + processor_id: str, + processor_version_id: str, + file_path: str, + mime_type: str, + field_mask: str = None, +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g. projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id} + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest( + name=name, raw_document=raw_document, field_mask=field_mask + ) + + result = client.process_document(request=request) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + document = result.document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_process_document_processor_version] diff --git a/documentai/snippets/process_document_processor_version_sample_test.py b/documentai/snippets/process_document_processor_version_sample_test.py new file mode 100644 index 00000000000..4094a76ba1b --- /dev/null +++ b/documentai/snippets/process_document_processor_version_sample_test.py @@ -0,0 +1,42 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +processor_version_id = "stable" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" +field_mask = "text,pages.pageNumber" + + +def test_process_document_processor_versions(capsys): + process_document_processor_version_sample.process_document_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + file_path=file_path, + mime_type=mime_type, + field_mask=field_mask, + ) + out, _ = capsys.readouterr() + + assert "text:" in out + assert "Invoice" in out diff --git a/documentai/snippets/process_document_quality_sample.py b/documentai/snippets/process_document_quality_sample.py new file mode 100644 index 00000000000..74b86400cff --- /dev/null +++ b/documentai/snippets/process_document_quality_sample.py @@ -0,0 +1,80 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_quality_document] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def process_document_quality_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # Online processing request to Document AI + document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Read the quality-specific information from the output from the + # Intelligent Document Quality Processor: + # https://cloud.google.com/document-ai/docs/processors-list#processor_doc-quality-processor + # OCR and other data is also present in the quality processor's response. + # Please see the OCR and other samples for how to parse other data in the + # response. + for entity in document.entities: + conf_percent = f"{entity.confidence:.1%}" + page_num = str(int(entity.page_anchor.page_refs[0].page) + 1) + print(f"\nPage {page_num} has a quality score of {conf_percent}") + + for prop in entity.properties: + conf_percent = f"{prop.confidence:.1%}" + print(f" * {prop.type_} score of {conf_percent}") + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +# [END documentai_process_quality_document] diff --git a/documentai/snippets/process_document_quality_sample_test.py b/documentai/snippets/process_document_quality_sample_test.py new file mode 100644 index 00000000000..64169be5c90 --- /dev/null +++ b/documentai/snippets/process_document_quality_sample_test.py @@ -0,0 +1,43 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_quality_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "7fcb597c523721b3" +poor_quality_file_path = "resources/document_quality_poor.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + process_document_quality_sample.process_document_quality_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=poor_quality_file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + expected_strings = [ + "Page 1 has a quality score of", + "defect_blurry score of 9", + "defect_noisy", + ] + for expected_string in expected_strings: + assert expected_string in out diff --git a/documentai/snippets/process_document_sample.py b/documentai/snippets/process_document_sample.py new file mode 100644 index 00000000000..c0d5ee9f976 --- /dev/null +++ b/documentai/snippets/process_document_sample.py @@ -0,0 +1,70 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_document] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types +# field_mask = "text,entities,pages.pageNumber" # Optional. The fields to return in the Document object. + + +def process_document_sample( + project_id: str, + location: str, + processor_id: str, + file_path: str, + mime_type: str, + field_mask: str = None, +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/{project_id}/locations/{location}/processors/{processor_id} + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest( + name=name, raw_document=raw_document, field_mask=field_mask + ) + + result = client.process_document(request=request) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + document = result.document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_process_document] diff --git a/documentai/snippets/process_document_sample_test.py b/documentai/snippets/process_document_sample_test.py new file mode 100644 index 00000000000..b4f0e092950 --- /dev/null +++ b/documentai/snippets/process_document_sample_test.py @@ -0,0 +1,40 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" +field_mask = "text,pages.pageNumber" + + +def test_process_documents(capsys): + process_document_sample.process_document_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + field_mask=field_mask, + ) + out, _ = capsys.readouterr() + + assert "text:" in out + assert "Invoice" in out diff --git a/documentai/snippets/process_document_specialized_sample.py b/documentai/snippets/process_document_specialized_sample.py new file mode 100644 index 00000000000..2b45f8803ae --- /dev/null +++ b/documentai/snippets/process_document_specialized_sample.py @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_specialized_document] + + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def process_document_specialized_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # Online processing request to Document AI + document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Extract entities from a specialized document + # Most specalized processors follow a similar pattern. + # For a complete list of processors see: + # https://cloud.google.com/document-ai/docs/processors-list + # + # OCR and other data is also present in the quality processor's response. + # Please see the OCR and other samples for how to parse other data in the + # response. + + print(f"Found {len(document.entities)} entities:") + for entity in document.entities: + print_entity(entity) + # Print Nested Entities (if any) + for prop in entity.properties: + print_entity(prop) + + +def print_entity(entity: documentai.Document.Entity) -> None: + # Fields detected. For a full list of fields for each processor see + # the processor documentation: + # https://cloud.google.com/document-ai/docs/processors-list + key = entity.type_ + # some other value formats in addition to text are availible + # e.g. dates: `entity.normalized_value.date_value.year` + text_value = entity.text_anchor.content + confidence = entity.confidence + normalized_value = entity.normalized_value.text + print(f" * {repr(key)}: {repr(text_value)}({confidence:.1%} confident)") + + if normalized_value: + print(f" * Normalized Value: {repr(normalized_value)}") + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +# [END documentai_process_specialized_document] diff --git a/documentai/snippets/process_document_specialized_sample_test.py b/documentai/snippets/process_document_specialized_sample_test.py new file mode 100644 index 00000000000..00a0f82f502 --- /dev/null +++ b/documentai/snippets/process_document_specialized_sample_test.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_specialized_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "feacd98c28866ede" +file_path = "resources/us_driver_license.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + process_document_specialized_sample.process_document_specialized_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + expected_strings = [ + "Document Id", + "97551579", + ] + for expected_string in expected_strings: + assert expected_string in out diff --git a/documentai/snippets/process_document_splitter_sample.py b/documentai/snippets/process_document_splitter_sample.py new file mode 100644 index 00000000000..99a7e54e3ca --- /dev/null +++ b/documentai/snippets/process_document_splitter_sample.py @@ -0,0 +1,101 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_splitter_document] + +from typing import Sequence + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def process_document_splitter_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # Online processing request to Document AI + document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Read the splitter output from a document splitter/classifier processor: + # e.g. https://cloud.google.com/document-ai/docs/processors-list#processor_procurement-document-splitter + # This processor only provides text for the document and information on how + # to split the document on logical boundaries. To identify and extract text, + # form elements, and entities please see other processors like the OCR, form, + # and specalized processors. + + print(f"Found {len(document.entities)} subdocuments:") + for entity in document.entities: + conf_percent = f"{entity.confidence:.1%}" + pages_range = page_refs_to_string(entity.page_anchor.page_refs) + + # Print subdocument type information, if available + if entity.type_: + print( + f"{conf_percent} confident that {pages_range} a '{entity.type_}' subdocument." + ) + else: + print(f"{conf_percent} confident that {pages_range} a subdocument.") + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +def page_refs_to_string( + page_refs: Sequence[documentai.Document.PageAnchor.PageRef], +) -> str: + """Converts a page ref to a string describing the page or page range.""" + if len(page_refs) == 1: + num = str(int(page_refs[0].page) + 1) + return f"page {num} is" + + nums = "" + for page_ref in page_refs: + nums += f"{int(page_ref.page) + 1}, " + return f"pages {nums[:-2]} are" + + +# [END documentai_process_splitter_document] diff --git a/documentai/snippets/process_document_splitter_sample_test.py b/documentai/snippets/process_document_splitter_sample_test.py new file mode 100644 index 00000000000..115c3ebbde7 --- /dev/null +++ b/documentai/snippets/process_document_splitter_sample_test.py @@ -0,0 +1,46 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import process_document_splitter_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "ed55eeb2b276066f" +file_path = "resources/multi_document.pdf" +mime_type = "application/pdf" + + +def test_process_documents(capsys): + process_document_splitter_sample.process_document_splitter_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + # Remove newlines and quotes from output for easier comparison + out = out.replace(' "" ', " ").replace("\n", "") + + expected_strings = [ + "Found 8 subdocuments", + "confident that pages 1, 2 are a subdocument", + "confident that page 10 is a subdocument", + ] + for expected_string in expected_strings: + assert expected_string in out diff --git a/documentai/snippets/quickstart_sample.py b/documentai/snippets/quickstart_sample.py new file mode 100644 index 00000000000..cb15ccf35ef --- /dev/null +++ b/documentai/snippets/quickstart_sample.py @@ -0,0 +1,64 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# [START documentai_quickstart] + + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types + + +def quickstart( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/python/docs/reference/documentai/latest/google.cloud.documentai_v1.types.Document + document = result.document + + # Read the text recognition output from the processor + print("The document contains the following text:") + print(document.text) + + +# [END documentai_quickstart] diff --git a/documentai/snippets/quickstart_sample_test.py b/documentai/snippets/quickstart_sample_test.py new file mode 100644 index 00000000000..f074368c674 --- /dev/null +++ b/documentai/snippets/quickstart_sample_test.py @@ -0,0 +1,38 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import quickstart_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" + + +def test_quickstart(capsys): + quickstart_sample.quickstart( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + assert "text:" in out + assert "Invoice" in out diff --git a/documentai/snippets/requirements-test.txt b/documentai/snippets/requirements-test.txt new file mode 100644 index 00000000000..980c425b939 --- /dev/null +++ b/documentai/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==7.2.0 +mock==4.0.3 diff --git a/documentai/snippets/requirements.txt b/documentai/snippets/requirements.txt new file mode 100644 index 00000000000..21e4fc4a828 --- /dev/null +++ b/documentai/snippets/requirements.txt @@ -0,0 +1,2 @@ +google-cloud-documentai==2.6.0 +google-cloud-storage==2.7.0 diff --git a/documentai/snippets/resources/document_quality_poor.pdf b/documentai/snippets/resources/document_quality_poor.pdf new file mode 100644 index 00000000000..3a34a925c04 Binary files /dev/null and b/documentai/snippets/resources/document_quality_poor.pdf differ diff --git a/documentai/snippets/resources/handwritten_form.pdf b/documentai/snippets/resources/handwritten_form.pdf new file mode 100644 index 00000000000..2189ffffd00 Binary files /dev/null and b/documentai/snippets/resources/handwritten_form.pdf differ diff --git a/documentai/snippets/resources/invoice.pdf b/documentai/snippets/resources/invoice.pdf new file mode 100644 index 00000000000..7722734a430 Binary files /dev/null and b/documentai/snippets/resources/invoice.pdf differ diff --git a/documentai/snippets/resources/multi_document.pdf b/documentai/snippets/resources/multi_document.pdf new file mode 100644 index 00000000000..7ea62eb8f78 Binary files /dev/null and b/documentai/snippets/resources/multi_document.pdf differ diff --git a/documentai/snippets/resources/us_driver_license.pdf b/documentai/snippets/resources/us_driver_license.pdf new file mode 100644 index 00000000000..f8f62d902ee Binary files /dev/null and b/documentai/snippets/resources/us_driver_license.pdf differ diff --git a/documentai/snippets/review_document_sample.py b/documentai/snippets/review_document_sample.py new file mode 100644 index 00000000000..a7194651880 --- /dev/null +++ b/documentai/snippets/review_document_sample.py @@ -0,0 +1,94 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_review_document] + +from google.api_core.client_options import ClientOptions +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# file_path = '/path/to/local/pdf' +# mime_type = 'application/pdf' # https://cloud.google.com/document-ai/docs/file-types + + +def review_document_sample( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + # Create a client + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # Make Processing Request + inline_document = process_document( + project_id, location, processor_id, file_path, mime_type + ) + + # Get the full resource name of the human review config, e.g.: + # projects/project_id/locations/location/processor/processor_id/humanReviewConfig + human_review_config = client.human_review_config_path( + project_id, location, processor_id + ) + + # Options are DEFAULT, URGENT + priority = documentai.ReviewDocumentRequest.Priority.DEFAULT + + # Configure the human review request + request = documentai.ReviewDocumentRequest( + inline_document=inline_document, + human_review_config=human_review_config, + enable_schema_validation=False, + priority=priority, + ) + + # Make a request for human review of the processed document + operation = client.review_document(request=request) + + # Print operation name, can be used to check status of the request + print(operation.operation.name) + + +def process_document( + project_id: str, location: str, processor_id: str, file_path: str, mime_type: str +) -> documentai.Document: + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project_id/locations/location/processor/processor_id + name = client.processor_path(project_id, location, processor_id) + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + # Load Binary Data into Document AI RawDocument Object + raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type) + + # Configure the process request + request = documentai.ProcessRequest(name=name, raw_document=raw_document) + + result = client.process_document(request=request) + + return result.document + + +# [END documentai_review_document] diff --git a/documentai/snippets/review_document_sample_test.py b/documentai/snippets/review_document_sample_test.py new file mode 100644 index 00000000000..f52351b326d --- /dev/null +++ b/documentai/snippets/review_document_sample_test.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import review_document_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "b7054d67d76c39f1" +file_path = "resources/invoice.pdf" +mime_type = "application/pdf" + + +def test_review_document(capsys): + review_document_sample.review_document_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + mime_type=mime_type, + ) + out, _ = capsys.readouterr() + + assert "projects/" in out + assert "locations/" in out + assert "operations/" in out diff --git a/documentai/snippets/set_default_processor_version_sample.py b/documentai/snippets/set_default_processor_version_sample.py new file mode 100644 index 00000000000..2f96674cb7e --- /dev/null +++ b/documentai/snippets/set_default_processor_version_sample.py @@ -0,0 +1,62 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_set_default_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import NotFound +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def set_default_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor + # e.g.: projects/project_id/locations/location/processors/processor_id + processor = client.processor_path(project_id, location, processor_id) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + processor_version = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + request = documentai.SetDefaultProcessorVersionRequest( + processor=processor, default_processor_version=processor_version + ) + + # Make SetDefaultProcessorVersion request + try: + operation = client.set_default_processor_version(request) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + except NotFound as e: + print(e.message) + + +# [END documentai_set_default_processor_version] diff --git a/documentai/snippets/set_default_processor_version_sample_test.py b/documentai/snippets/set_default_processor_version_sample_test.py new file mode 100644 index 00000000000..0ce5c9a0d85 --- /dev/null +++ b/documentai/snippets/set_default_processor_version_sample_test.py @@ -0,0 +1,44 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from documentai.snippets import set_default_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "aeb8cea219b7c272" +current_default_processor_version = "pretrained-ocr-v1.0-2020-09-23" +new_default_processor_version = "pretrained-ocr-v1.1-2022-09-12" + + +def test_set_default_processor_version(capsys): + set_default_processor_version_sample.set_default_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=new_default_processor_version, + ) + out, _ = capsys.readouterr() + + assert "operation" in out + + # Set back to previous default + set_default_processor_version_sample.set_default_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=current_default_processor_version, + ) diff --git a/documentai/snippets/undeploy_processor_version_sample.py b/documentai/snippets/undeploy_processor_version_sample.py new file mode 100644 index 00000000000..a3ac91ce1dd --- /dev/null +++ b/documentai/snippets/undeploy_processor_version_sample.py @@ -0,0 +1,58 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_undeploy_processor_version] + +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import FailedPrecondition +from google.api_core.exceptions import InvalidArgument +from google.cloud import documentai + +# TODO(developer): Uncomment these variables before running the sample. +# project_id = 'YOUR_PROJECT_ID' +# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample +# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID' + + +def undeploy_processor_version_sample( + project_id: str, location: str, processor_id: str, processor_version_id: str +): + # You must set the api_endpoint if you use a location other than 'us'. + opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor version + # e.g.: projects/project_id/locations/location/processors/processor_id/processorVersions/processor_version_id + name = client.processor_version_path( + project_id, location, processor_id, processor_version_id + ) + + # Make UndeployProcessorVersion request + try: + operation = client.undeploy_processor_version(name=name) + # Print operation details + print(operation.operation.name) + # Wait for operation to complete + operation.result() + # Undeploy request will fail if the + # processor version is already undeployed + # or if a request is made on a pretrained processor version + except (FailedPrecondition, InvalidArgument) as e: + print(e.message) + + +# [END documentai_undeploy_processor_version] diff --git a/documentai/snippets/undeploy_processor_version_sample_test.py b/documentai/snippets/undeploy_processor_version_sample_test.py new file mode 100644 index 00000000000..7a9528dac13 --- /dev/null +++ b/documentai/snippets/undeploy_processor_version_sample_test.py @@ -0,0 +1,48 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +import mock +from documentai.snippets import undeploy_processor_version_sample + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "aaaaaaaaa" +processor_version_id = "xxxxxxxxxx" + + +# TODO: Switch to Real Endpoint when Deployable Versions are Available +@mock.patch( + "google.cloud.documentai.DocumentProcessorServiceClient.undeploy_processor_version" +) +@mock.patch("google.api_core.operation.Operation") +def test_undeploy_processor_version( + operation_mock, undeploy_processor_version_mock, capsys +): + undeploy_processor_version_mock.return_value = operation_mock + + undeploy_processor_version_sample.undeploy_processor_version_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + processor_version_id=processor_version_id, + ) + + undeploy_processor_version_mock.assert_called_once() + + out, _ = capsys.readouterr() + + assert "operation" in out diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 00000000000..980c425b939 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==7.2.0 +mock==4.0.3 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000000..21e4fc4a828 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +google-cloud-documentai==2.6.0 +google-cloud-storage==2.7.0 diff --git a/resources/document_quality_poor.pdf b/resources/document_quality_poor.pdf new file mode 100644 index 00000000000..3a34a925c04 Binary files /dev/null and b/resources/document_quality_poor.pdf differ diff --git a/resources/handwritten_form.pdf b/resources/handwritten_form.pdf new file mode 100644 index 00000000000..2189ffffd00 Binary files /dev/null and b/resources/handwritten_form.pdf differ diff --git a/resources/invoice.pdf b/resources/invoice.pdf new file mode 100644 index 00000000000..7722734a430 Binary files /dev/null and b/resources/invoice.pdf differ diff --git a/resources/multi_document.pdf b/resources/multi_document.pdf new file mode 100644 index 00000000000..7ea62eb8f78 Binary files /dev/null and b/resources/multi_document.pdf differ diff --git a/resources/us_driver_license.pdf b/resources/us_driver_license.pdf new file mode 100644 index 00000000000..f8f62d902ee Binary files /dev/null and b/resources/us_driver_license.pdf differ