diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 00000000..e4b41982 --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,35 @@ +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `main` +- pattern: main + requiresStrictStatusChecks: true + requiredStatusCheckContexts: + - 'cla/google' + - 'OwlBot Post Processor' + - 'docs' + - 'lint' + - 'unit (3.8)' + - 'unit (3.9)' + - 'unit (3.10)' + - 'unit (3.11)' + - 'unit (3.12)' + - 'cover' + - 'Kokoro' + - 'Samples - Lint' + - 'Samples - Python 3.8' + - 'Samples - Python 3.12' +permissionRules: + - team: actools-python + permission: admin + - team: actools + permission: admin + - team: cdpe-cloudai + permission: admin + - team: yoshi-python + permission: push + - team: python-samples-owners + permission: push + - team: python-samples-reviewers + permission: push diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 87d08602..8e730b76 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 87b0072a..84089243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [0.13.3-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.13.2-alpha...v0.13.3-alpha) (2024-03-11) + + +### Bug Fixes + +* Drop Python 3.7 Support ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Escape html special characters in `hocr_document_template.xml.j2` ([#279](https://github.com/googleapis/python-documentai-toolbox/issues/279)) ([2d9f05b](https://github.com/googleapis/python-documentai-toolbox/commit/2d9f05bfc28efb5fc6f8829921b45a046b768944)) +* Require google-api-core >= 2.17.1 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require numpy >= 1.23.5 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require pandas >= 2.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require pikepdf >= 8.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require Pillow >= 10.0.0 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) +* Require proto-plus >= 1.22.3 ([71e6c51](https://github.com/googleapis/python-documentai-toolbox/commit/71e6c518e8da1b88f1d2b6ebedb8a20f4104b836)) + ## [0.13.2-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.13.1-alpha...v0.13.2-alpha) (2024-03-08) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b0f7e49d..90998b26 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. + 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.8 -- -k + $ nox -s system-3.12 -- -k .. note:: - System tests are only configured to run under Python 3.8. + System tests are only configured to run under Python 3.8, 3.9, 3.10, 3.11 and 3.12. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -221,14 +221,12 @@ Supported Python Versions We support: -- `Python 3.7`_ - `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ -.. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ @@ -241,7 +239,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-documentai-toolbox/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.7. +We also explicitly decided to support Python 3 beginning with version 3.8. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/README.rst b/README.rst index 8f375a88..fcd6a019 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ The Document AI Toolbox is in an experimental state. This library is a work-in-p .. |experimental| image:: https://img.shields.io/badge/support-experimental-red.svg :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#stability-levels -.. |versions| image:: https://img.shields.io/pypi/pyversions/google-analytics-admin.svg - :target: https://pypi.org/project/google-analytics-admin/ +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-documentai-toolbox.svg + :target: https://pypi.org/project/google-cloud-documentai-toolbox/ Quick Start @@ -63,7 +63,7 @@ Supported Python Versions Our client libraries are compatible with all current `active`_ and `maintenance`_ versions of Python. -Python >= 3.7 +Python >= 3.8 .. _active: https://devguide.python.org/devcycle/#in-development-main-branch .. _maintenance: https://devguide.python.org/devcycle/#maintenance-branches diff --git a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 index dad071e1..bc4c0053 100644 --- a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 +++ b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 @@ -19,9 +19,9 @@ {% set paridx = loop.index0 -%}

{% for line in paragraph.lines -%} {% set lidx = loop.index0 -%} - {{ line.text }}{% for token in line.tokens -%} + {{ line.text|escape }}{% for token in line.tokens -%} {% set tidx = loop.index0 -%} - {{ token.text }}{% endfor -%} + {{ token.text|escape }}{% endfor -%} {% endfor -%}

{% endfor -%} {% endfor -%} diff --git a/google/cloud/documentai_toolbox/version.py b/google/cloud/documentai_toolbox/version.py index 61e745bb..b2dd4331 100644 --- a/google/cloud/documentai_toolbox/version.py +++ b/google/cloud/documentai_toolbox/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.13.2-alpha" +__version__ = "0.13.3-alpha" diff --git a/noxfile.py b/noxfile.py index a48bb8ca..9a9b1ba2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -34,7 +34,7 @@ DEFAULT_PYTHON_VERSION = "3.8" -UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.10", "3.11", "3.12"] UNIT_TEST_STANDARD_DEPENDENCIES = [ "mock", "asyncmock", @@ -48,7 +48,7 @@ UNIT_TEST_EXTRAS: List[str] = [] UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {} -SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8"] +SYSTEM_TEST_PYTHON_VERSIONS: List[str] = ["3.8", "3.9", "3.10", "3.11", "3.12"] SYSTEM_TEST_STANDARD_DEPENDENCIES: List[str] = [ "mock", "pytest", diff --git a/owlbot.py b/owlbot.py index fd06658a..4c6e8171 100644 --- a/owlbot.py +++ b/owlbot.py @@ -28,7 +28,8 @@ # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library( - system_test_python_versions=["3.8"], + unit_test_python_versions=["3.8", "3.9", "3.10", "3.11", "3.12"], + system_test_python_versions=["3.8", "3.9", "3.10", "3.11", "3.12"], cov_level=99, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/" diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 33e8951f..8050fe14 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -40,7 +40,14 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7", "3.6"], + "ignored_versions": [ + "2.7", + "3.6", + "3.7", + "3.9", + "3.10", + "3.11", + ], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": True, diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index a989fc83..4abbc9d1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.4 +pytest==8.1.1 mock==5.1.0 -google-cloud-bigquery==3.17.2 +google-cloud-bigquery==3.19.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4c0a7afd..a249eb5f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==3.17.2 -google-cloud-documentai==2.24.0 -google-cloud-storage==2.14.0 -google-cloud-documentai-toolbox==0.12.2a0 +google-cloud-bigquery==3.19.0 +google-cloud-documentai==2.24.1 +google-cloud-storage==2.15.0 +google-cloud-documentai-toolbox==0.13.1a0 diff --git a/setup.py b/setup.py index fb5d5fea..23265516 100644 --- a/setup.py +++ b/setup.py @@ -52,33 +52,28 @@ "google.cloud.documentai_toolbox": ["templates/*.xml.j2"], }, install_requires=( - "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "pandas >= 1.0.0, <3.0.0", - "pandas >= 1.0.0, <2.0.0; python_version<'3.8'", - "pyarrow >= 15.0.0, <16.0.0; python_version>='3.8'", # Required by Pandas #237 - "tabulate >= 0.9.0, <1.0.0", - "proto-plus >= 1.22.0, <2.0.0dev", - "proto-plus >= 1.22.2, <2.0.0dev; python_version>='3.11'", - "grpc-google-iam-v1 >= 0.12.6, < 0.13dev", - "google-cloud-bigquery >= 3.5.0, < 4.0.0dev", - "google-cloud-documentai >= 2.20.0, < 3.0.0dev", - "google-cloud-storage >= 1.31.0, < 3.0.0dev", - "google-cloud-vision >= 2.7.0, < 4.0.0dev", - "numpy >= 1.18.1, < 2.0.0", - "intervaltree >= 3.0.0", - "pikepdf >= 6.2.9, < 9.0.0", - "pikepdf >= 6.2.9, < 7.0.0; python_version<'3.8'", - "immutabledict >= 2.0.0, < 4.0.0", - "immutabledict >= 2.0.0, < 3.0.0dev; python_version<'3.8'", - "Pillow >= 9.5.0, < 11.0.0", - "Jinja2 >= 3.1.0, <= 4.0.0", + "google-api-core>=2.15.0, <3.0.0dev", + "pandas[performance,gcp]>=2.0.0, <3.0.0", + "pyarrow>=15.0.0, <16.0.0", + "tabulate>=0.9.0, <1.0.0", + "proto-plus>=1.22.3, <2.0.0dev", + "grpc-google-iam-v1>=0.12.6, <1.0.0dev", + "google-cloud-bigquery>=3.5.0, <4.0.0dev", + "google-cloud-documentai>=2.20.0, <3.0.0dev", + "google-cloud-storage>=1.31.0, <3.0.0dev", + "google-cloud-vision>=2.7.0, <4.0.0dev", + "numpy>=1.23.5, <2.0.0", + "intervaltree>=3.0.0", + "pikepdf>=8.0.0, <9.0.0", + "immutabledict>=2.0.0, <5.0.0", + "Pillow>=10.0.0, <11.0.0", + "Jinja2>=3.1.0, <4.0.0", ), - python_requires=">=3.7", + python_requires=">=3.8", classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.10.txt +++ b/testing/constraints-3.10.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt index c9f0e4bb..e830766f 100644 --- a/testing/constraints-3.12.txt +++ b/testing/constraints-3.12.txt @@ -2,7 +2,6 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas proto-plus grpc-google-iam-v1 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt deleted file mode 100644 index 3c64ab2e..00000000 --- a/testing/constraints-3.7.txt +++ /dev/null @@ -1,16 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List all library dependencies and extras in this file. -# Pin the version to the lower bound. -# e.g., if setup.py has "google-cloud-foo >= 1.14.0, < 2.0.0dev", -# Then this file should have google-cloud-foo==1.14.0 -google-api-core==1.34.0 -libcst==0.2.5 -pandas==1.0.0 -proto-plus==1.22.0 -grpc-google-iam-v1==0.12.6 -google-cloud-bigquery==3.5.0 -google-cloud-documentai==2.20.0 -google-cloud-storage==2.7.0 -numpy==1.19.5 -pikepdf==6.2.9 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index ed1905e2..9214782a 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,13 +1,17 @@ # -*- coding: utf-8 -*- -# This constraints file is required for unit tests. +# This constraints file is used to check that lower bounds +# are correct in setup.py # List all library dependencies and extras in this file. -google-api-core -libcst -pandas -proto-plus -grpc-google-iam-v1 -google-cloud-bigquery -google-cloud-documentai -google-cloud-storage -numpy==1.21.6 +# Pin the version to the lower bound. +# e.g., if setup.py has "google-cloud-foo >= 1.14.0, < 2.0.0dev", +# Then this file should have google-cloud-foo==1.14.0 +google-api-core==2.15.0 +pandas==2.0.0 +proto-plus==1.22.3 +grpc-google-iam-v1==0.12.6 +google-cloud-bigquery==3.5.0 +google-cloud-documentai==2.20.0 +google-cloud-storage==2.7.0 +pandas-gbq==0.21.0 +numpy==1.23.5 pikepdf==8.2.3 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 837480d0..f02a2e2c 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -2,9 +2,8 @@ # This constraints file is required for unit tests. # List all library dependencies and extras in this file. google-api-core -libcst pandas -pyarrow # Required by Pandas #237 +pyarrow proto-plus grpc-google-iam-v1 google-cloud-bigquery diff --git a/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json new file mode 100644 index 00000000..222f3afc --- /dev/null +++ b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json @@ -0,0 +1,284 @@ +{ + "text": "", + "pages": [ + { + "pageNumber": 1, + "dimension": { + "width": 1758.0, + "height": 2275.0, + "unit": "pixels" + }, + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "435" + } + ] + }, + "boundingPoly": { + "vertices": [ + {}, + { + "x": 1758 + }, + { + "x": 1758, + "y": 2275 + }, + { + "y": 2275 + } + ], + "normalizedVertices": [ + {}, + { + "x": 1.0 + }, + { + "x": 1.0, + "y": 1.0 + }, + { + "y": 1.0 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + }, + { + "languageCode": "und" + } + ], + "blocks": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + } + } + ], + "paragraphs": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + } + } + ], + "lines": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + } + ] + } + ], + "tokens": [ + { + "layout": { + "textAnchor": { + "textSegments": [ + { + "endIndex": "8" + } + ] + }, + "confidence": 0.99258333, + "boundingPoly": { + "vertices": [ + { + "x": 1310, + "y": 220 + }, + { + "x": 1534, + "y": 220 + }, + { + "x": 1534, + "y": 282 + }, + { + "x": 1310, + "y": 282 + } + ], + "normalizedVertices": [ + { + "x": 0.74516493, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.0967033 + }, + { + "x": 0.8725825, + "y": 0.12395605 + }, + { + "x": 0.74516493, + "y": 0.12395605 + } + ] + }, + "orientation": 1 + }, + "detectedLanguages": [ + { + "languageCode": "en" + } + ] + } + ] + } + ], + "shardInfo": { + "shardCount": "1" + } +} \ No newline at end of file diff --git a/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml new file mode 100644 index 00000000..362b82a4 --- /dev/null +++ b/tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml @@ -0,0 +1,16 @@ + + + + +hocr-escape + + + + + + + + +

<Invoice<Invoice

+ + \ No newline at end of file diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index bcf71ae5..286a482d 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -17,6 +17,7 @@ import json import os import shutil +from xml.etree import ElementTree # try/except added for compatibility with python < 3.8 try: @@ -791,6 +792,9 @@ def test_export_hocr_str(): actual_hocr = wrapped_document.export_hocr_str(title="toolbox_invoice_test-0") assert actual_hocr + element = ElementTree.fromstring(actual_hocr) + assert element is not None + with open( "tests/unit/resources/toolbox_invoice_test_0_hocr.xml", "r", encoding="utf-8" ) as f: @@ -808,6 +812,30 @@ def test_export_hocr_str_with_blank_document(): assert actual_hocr + element = ElementTree.fromstring(actual_hocr) + assert element is not None + + +def test_export_hocr_str_with_escape_characters(): + wrapped_document = document.Document.from_document_path( + document_path="tests/unit/resources/toolbox_invoice_test-0-hocr-escape.json" + ) + + actual_hocr = wrapped_document.export_hocr_str(title="hocr-escape") + assert actual_hocr + + element = ElementTree.fromstring(actual_hocr) + assert element is not None + + with open( + "tests/unit/resources/toolbox_invoice_test-0-hocr-escape.xml", + "r", + encoding="utf-8", + ) as f: + expected = f.read() + + assert actual_hocr == expected + def test_document_to_merged_documentai_document(get_bytes_multiple_files_mock): wrapped_document = document.Document.from_gcs(