From 71e6c518e8da1b88f1d2b6ebedb8a20f4104b836 Mon Sep 17 00:00:00 2001 From: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:54:19 -0600 Subject: [PATCH 1/6] fix: Drop Python 3.7 Support (#262) BEGIN_COMMIT_OVERRIDE fix: Drop Python 3.7 Support fix: Require google-api-core >= 2.17.1 fix: Require pandas >= 2.0.0 fix: Require proto-plus >= 1.22.3 fix: Require numpy >= 1.23.5 fix: Require pikepdf >= 8.0.0 fix: Require Pillow >= 10.0.0 END_COMMIT_OVERRIDE --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/sync-repo-settings.yaml | 35 +++++++++++++++++++++++++++ .github/workflows/unittest.yml | 2 +- CONTRIBUTING.rst | 10 ++++---- README.rst | 6 ++--- noxfile.py | 4 ++-- owlbot.py | 3 ++- samples/snippets/noxfile.py | 9 ++++++- samples/snippets/requirements.txt | 2 +- setup.py | 39 ++++++++++++++----------------- testing/constraints-3.10.txt | 1 - testing/constraints-3.11.txt | 1 - testing/constraints-3.12.txt | 1 - testing/constraints-3.7.txt | 16 ------------- testing/constraints-3.8.txt | 24 +++++++++++-------- testing/constraints-3.9.txt | 3 +-- 15 files changed, 88 insertions(+), 68 deletions(-) create mode 100644 .github/sync-repo-settings.yaml delete mode 100644 testing/constraints-3.7.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 00000000..e4b41982 --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,35 @@ +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `main` +- pattern: main + requiresStrictStatusChecks: true + requiredStatusCheckContexts: + - 'cla/google' + - 'OwlBot Post Processor' + - 'docs' + - 'lint' + - 'unit (3.8)' + - 'unit (3.9)' + - 'unit (3.10)' + - 'unit (3.11)' + - 'unit (3.12)' + - 'cover' + - 'Kokoro' + - 'Samples - Lint' + - 'Samples - Python 3.8' + - 'Samples - Python 3.12' +permissionRules: + - team: actools-python + permission: admin + - team: actools + permission: admin + - team: cdpe-cloudai + permission: admin + - team: yoshi-python + permission: push + - team: python-samples-owners + permission: push + - team: python-samples-reviewers + permission: push diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 87d08602..8e730b76 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b0f7e49d..90998b26 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. + 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.8 -- -k + $ nox -s system-3.12 -- -k .. note:: - System tests are only configured to run under Python 3.8. + System tests are only configured to run under Python 3.8, 3.9, 3.10, 3.11 and 3.12. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -221,14 +221,12 @@ Supported Python Versions We support: -- `Python 3.7`_ - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ -.. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ @@ -241,7 +239,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-documentai-toolbox/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.7. +We also explicitly decided to support Python 3 beginning with version 3.8. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/README.rst b/README.rst index 8f375a88..fcd6a019 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ The Document AI Toolbox is in an experimental state. This library is a work-in-p .. |experimental| image:: https://img.shields.io/badge/support-experimental-red.svg :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#stability-levels -.. |versions| image:: https://img.shields.io/pypi/pyversions/google-analytics-admin.svg - :target: https://pypi.org/project/google-analytics-admin/ +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-documentai-toolbox.svg + :target: https://pypi.org/project/google-cloud-documentai-toolbox/ Quick Start @@ -63,7 +63,7 @@ Supported Python Versions Our client libraries are compatible with all current `active`_ and `maintenance`_ versions of Python. -Python >= 3.7 +Python >= 3.8 .. _active: https://devguide.python.org/devcycle/#in-development-main-branch .. _maintenance: https://devguide.python.org/devcycle/#maintenance-branches diff --git a/noxfile.py b/noxfile.py index a48bb8ca..9a9b1ba2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -34,7 +34,7 @@ DEFAULT_PYTHON_VERSION = "3.8" -UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.10", "3.11", "3.12"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -48,7 +48,7 @@ UNIT_TEST_EXTRAS: List[str] = [] UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {} -SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8"] +SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.10", "3.11", "3.12"] SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [ "mock", "pytest", diff --git a/owlbot.py b/owlbot.py index fd06658a..4c6e8171 100644 --- a/owlbot.py +++ b/owlbot.py @@ -28,7 +28,8 @@ # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library( - system_test_python_versions=["3.8"], + unit_test_python_versions=["3.8", "3.9", "3.10", "3.11", "3.12"], + system_test_python_versions=["3.8", "3.9", "3.10", "3.11", "3.12"], cov_level=99, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/" diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 33e8951f..8050fe14 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -40,7 +40,14 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7", "3.6"], + "ignored_versions": [ + "2.7", + "3.6", + "3.7", + "3.9", + "3.10", + "3.11", + ], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": True, diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4c0a7afd..c30d96b7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-cloud-bigquery==3.17.2 google-cloud-documentai==2.24.0 google-cloud-storage==2.14.0 -google-cloud-documentai-toolbox==0.12.2a0 +google-cloud-documentai-toolbox==0.13.1a0 diff --git a/setup.py b/setup.py index fb5d5fea..02ca97bd 100644 --- a/setup.py +++ b/setup.py @@ -52,33 +52,28 @@ "google.cloud.documentai_toolbox": ["templates/*.xml.j2"], }, install_requires=( - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "pandas >= 1.0.0, <3.0.0", - "pandas >= 1.0.0, <2.0.0; python_version<'3.8'", - "pyarrow >= 15.0.0, <16.0.0; python_version>='3.8'", # Required by Pandas #237 - "tabulate >= 0.9.0, <1.0.0", - "proto-plus >= 1.22.0, <2.0.0dev", - "proto-plus >= 1.22.2, <2.0.0dev; python_version>='3.11'", - "grpc-google-iam-v1 >= 0.12.6, < 0.13dev", - "google-cloud-bigquery >= 3.5.0, < 4.0.0dev", - "google-cloud-documentai >= 2.20.0, < 3.0.0dev", - "google-cloud-storage >= 1.31.0, < 3.0.0dev", - "google-cloud-vision >= 2.7.0, < 4.0.0dev", - "numpy >= 1.18.1, < 2.0.0", - "intervaltree >= 3.0.0", - "pikepdf >= 6.2.9, < 9.0.0", - "pikepdf >= 6.2.9, < 7.0.0; python_version<'3.8'", - "immutabledict >= 2.0.0, < 4.0.0", - "immutabledict >= 2.0.0, < 3.0.0dev; python_version<'3.8'", - "Pillow >= 9.5.0, < 11.0.0", - "Jinja2 >= 3.1.0, <= 4.0.0", + "google-api-core>=2.17.1, <3.0.0dev", + "pandas[performance,gcp]>=2.0.0, <3.0.0", + "pyarrow>=15.0.0, <16.0.0", + "tabulate>=0.9.0, <1.0.0", + "proto-plus>=1.22.3, <2.0.0dev", + "grpc-google-iam-v1>=0.12.6, <1.0.0dev", + "google-cloud-bigquery>=3.5.0, <4.0.0dev", + "google-cloud-documentai>=2.20.0, <3.0.0dev", + "google-cloud-storage>=1.31.0, <3.0.0dev", + "google-cloud-vision>=2.7.0, <4.0.0dev", + "numpy>=1.23.5, <2.0.0", + "intervaltree>=3.0.0", + "pikepdf>=8.0.0, <9.0.0", + "immutabledict>=2.0.0, <5.0.0", + "Pillow>=10.0.0, <11.0.0", + "Jinja2>=3.1.0, <4.0.0", ), - python_requires=">=3.7", + python_requires=">=3.8", classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.10.txt +++ b/testing/constraints-3.10.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt deleted file mode 100644 index 3c64ab2e..00000000 --- a/testing/constraints-3.7.txt +++ /dev/null @@ -1,16 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List all library dependencies and extras in this file. -# Pin the version to the lower bound. -# e.g., if setup.py has "google-cloud-foo >= 1.14.0, < 2.0.0dev", -# Then this file should have google-cloud-foo==1.14.0 -google-api-core==1.34.0 -libcst==0.2.5 -pandas==1.0.0 -proto-plus==1.22.0 -grpc-google-iam-v1==0.12.6 -google-cloud-bigquery==3.5.0 -google-cloud-documentai==2.20.0 -google-cloud-storage==2.7.0 -numpy==1.19.5 -pikepdf==6.2.9 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index ed1905e2..b47fd160 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,13 +1,17 @@ # -*- coding: utf-8 -*- -# This constraints file is required for unit tests. +# This constraints file is used to check that lower bounds +# are correct in setup.py # List all library dependencies and extras in this file. -google-api-core -libcst -pandas -proto-plus -grpc-google-iam-v1 -google-cloud-bigquery -google-cloud-documentai -google-cloud-storage -numpy==1.21.6 +# Pin the version to the lower bound. +# e.g., if setup.py has "google-cloud-foo >= 1.14.0, < 2.0.0dev", +# Then this file should have google-cloud-foo==1.14.0 +google-api-core==2.17.1 +pandas==2.0.0 +proto-plus==1.22.3 +grpc-google-iam-v1==0.12.6 +google-cloud-bigquery==3.5.0 +google-cloud-documentai==2.20.0 +google-cloud-storage==2.7.0 +pandas-gbq==0.21.0 +numpy==1.23.5 pikepdf==8.2.3 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 837480d0..f02a2e2c 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -2,9 +2,8 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas -pyarrow # Required by Pandas #237 +pyarrow proto-plus grpc-google-iam-v1 google-cloud-bigquery From 71191abc0d0336ea68f98d93f2a185d2c0619259 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 11 Mar 2024 17:15:20 +0100 Subject: [PATCH 2/6] chore(deps): update all dependencies (#260) --- samples/snippets/requirements-test.txt | 4 ++-- samples/snippets/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index a989fc83..7d3c8830 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.4 +pytest==8.1.1 mock==5.1.0 -google-cloud-bigquery==3.17.2 +google-cloud-bigquery==3.18.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c30d96b7..3be32e3f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==3.17.2 -google-cloud-documentai==2.24.0 -google-cloud-storage==2.14.0 +google-cloud-bigquery==3.18.0 +google-cloud-documentai==2.24.1 +google-cloud-storage==2.15.0 google-cloud-documentai-toolbox==0.13.1a0 From 2d9f05bfc28efb5fc6f8829921b45a046b768944 Mon Sep 17 00:00:00 2001 From: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:39:21 -0500 Subject: [PATCH 3/6] fix: Escape html special characters in `hocr_document_template.xml.j2` (#279) * fix: Escape html special characters in hocr_document_template.xml.j2 * test: Add Unit test for hOCR XML validity. --- .../templates/hocr_document_template.xml.j2 | 4 +- .../toolbox_invoice_test-0-hocr-escape.json | 284 ++++++++++++++++++ .../toolbox_invoice_test-0-hocr-escape.xml | 16 + tests/unit/test_document.py | 28 ++ 4 files changed, 330 insertions(+), 2 deletions(-) create mode 100644 tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json create mode 100644 tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml diff --git a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 index dad071e1..bc4c0053 100644 --- a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 +++ b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 @@ -19,9 +19,9 @@ {% set paridx = loop.index0 -%}

{% for line in paragraph.lines -%} {% set lidx = loop.index0 -%} - {{ line.text }}{% for token in line.tokens -%} + {{ line.text|escape }}{% for token in line.tokens -%} {% set tidx = loop.index0 -%} - {{ token.text }}{% endfor -%} + {{ token.text|escape }}{% endfor -%} {% endfor -%}

{% endfor -%} {% endfor -%} diff --git a/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json new file mode 100644 index 00000000..222f3afc --- /dev/null +++ b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json @@ -0,0 +1,284 @@ +{ + "text": "", + "pages": [ + { + "pageNumber": 1, + "dimension": { + "width": 1758.0, + "height": 2275.0, + "unit": "pixels" + }, + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "435" + } + ] + }, + "boundingPoly": { + "vertices": [ + {}, + { + "x": 1758 + }, + { + "x": 1758, + "y": 2275 + }, + { + "y": 2275 + } + ], + "normalizedVertices": [ + {}, + { + "x": 1.0 + }, + { + "x": 1.0, + "y": 1.0 + }, + { + "y": 1.0 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + }, + { + "languageCode": "und" + } + ], + "blocks": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + } + } + ], + "paragraphs": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + } + } + ], + "lines": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + } + ] + } + ], + "tokens": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + } + ] + } + ] + } + ], + "shardInfo": { + "shardCount": "1" + } +} \ No newline at end of file diff --git a/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml new file mode 100644 index 00000000..362b82a4 --- /dev/null +++ b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml @@ -0,0 +1,16 @@ + + + + +hocr-escape + + + + + + + + +

<Invoice<Invoice

+ + \ No newline at end of file diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index bcf71ae5..286a482d 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -17,6 +17,7 @@ import json import os import shutil +from xml.etree import ElementTree # try/except added for compatibility with python < 3.8 try: @@ -791,6 +792,9 @@ def test_export_hocr_str(): actual_hocr = wrapped_document.export_hocr_str(title="toolbox_invoice_test-0") assert actual_hocr + element = ElementTree.fromstring(actual_hocr) + assert element is not None + with open( "tests/unit/resources/toolbox_invoice_test_0_hocr.xml", "r", encoding="utf-8" ) as f: @@ -808,6 +812,30 @@ def test_export_hocr_str_with_blank_document(): assert actual_hocr + element = ElementTree.fromstring(actual_hocr) + assert element is not None + + +def test_export_hocr_str_with_escape_characters(): + wrapped_document = document.Document.from_document_path( + document_path="tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json" + ) + + actual_hocr = wrapped_document.export_hocr_str(title="hocr-escape") + assert actual_hocr + + element = ElementTree.fromstring(actual_hocr) + assert element is not None + + with open( + "tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml", + "r", + encoding="utf-8", + ) as f: + expected = f.read() + + assert actual_hocr == expected + def test_document_to_merged_documentai_document(get_bytes_multiple_files_mock): wrapped_document = document.Document.from_gcs( From e12f576179eb99f87050708ae70af313decbcd51 Mon Sep 17 00:00:00 2001 From: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Date: Mon, 11 Mar 2024 12:02:24 -0500 Subject: [PATCH 4/6] chore(deps): Fix possible dependency conflict with `google-api-core` `2.17.1` (#280) --- setup.py | 2 +- testing/constraints-3.8.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 02ca97bd..23265516 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ "google.cloud.documentai_toolbox": ["templates/*.xml.j2"], }, install_requires=( - "google-api-core>=2.17.1, <3.0.0dev", + "google-api-core>=2.15.0, <3.0.0dev", "pandas[performance,gcp]>=2.0.0, <3.0.0", "pyarrow>=15.0.0, <16.0.0", "tabulate>=0.9.0, <1.0.0", diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index b47fd160..9214782a 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -5,7 +5,7 @@ # Pin the version to the lower bound. # e.g., if setup.py has "google-cloud-foo >= 1.14.0, < 2.0.0dev", # Then this file should have google-cloud-foo==1.14.0 -google-api-core==2.17.1 +google-api-core==2.15.0 pandas==2.0.0 proto-plus==1.22.3 grpc-google-iam-v1==0.12.6 From f5c9bc9db643920116e49da2ab48fdb542e2ebbb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 11 Mar 2024 18:30:56 +0100 Subject: [PATCH 5/6] chore(deps): update dependency google-cloud-bigquery to v3.19.0 (#282) --- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 7d3c8830..4abbc9d1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.1.1 mock==5.1.0 -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3be32e3f..a249eb5f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==3.18.0 +google-cloud-bigquery==3.19.0 google-cloud-documentai==2.24.1 google-cloud-storage==2.15.0 google-cloud-documentai-toolbox==0.13.1a0 From 6e4cc3570fb468473ac9e60e2a65116534b0cae1 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Mar 2024 10:49:37 -0700 Subject: [PATCH 6/6] chore(main): release 0.13.3-alpha (#276) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Holt Skinner <13262395+holtskinner@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/documentai_toolbox/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87b0072a..84089243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [0.13.3-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.13.2-alpha...v0.13.3-alpha) (2024-03-11) + + +### Bug Fixes + +* Drop Python 3.7 Support ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Escape html special characters in `hocr_document_template.xml.j2` ([#279](https://github.com/googleapis/python-documentai-toolbox/issues/279)) ([2d9f05b](https://github.com/googleapis/python-documentai-toolbox/commit/2d9f05bfc28efb5fc6f8829921b45a046b768944)) +* Require google-api-core >= 2.17.1 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require numpy >= 1.23.5 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require pandas >= 2.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require pikepdf >= 8.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require Pillow >= 10.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require proto-plus >= 1.22.3 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) + ## [0.13.2-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.13.1-alpha...v0.13.2-alpha) (2024-03-08) diff --git a/google/cloud/documentai_toolbox/version.py b/google/cloud/documentai_toolbox/version.py index 61e745bb..b2dd4331 100644 --- a/google/cloud/documentai_toolbox/version.py +++ b/google/cloud/documentai_toolbox/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.13.2-alpha" +__version__ = "0.13.3-alpha"