diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7f291dbd..ec696b55 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 -# created: 2023-10-18T20:26:37.410353675Z + digest: sha256:30470597773378105e239b59fce8eb27cc97375580d592699206d17d117143d0 +# created: 2023-11-03T00:57:07.335914631Z diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e97d89e4..221806ce 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -28,7 +28,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.10" - name: Install nox run: | python -m pip install --upgrade setuptools pip wheel diff --git a/CHANGELOG.md b/CHANGELOG.md index 72166310..1940f6c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.11.2-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.11.1-alpha...v0.11.2-alpha) (2023-11-07) + + +### Bug Fixes + +* Updates to hOCR Template to follow hOCR Spec ([#195](https://github.com/googleapis/python-documentai-toolbox/issues/195)) ([3f52e82](https://github.com/googleapis/python-documentai-toolbox/commit/3f52e82eaa741cd2c8a08e8398ed6f4b3f65c419)) + ## [0.11.1-alpha](https://github.com/googleapis/python-documentai-toolbox/compare/v0.11.0-alpha...v0.11.1-alpha) (2023-10-23) diff --git a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 index 63db0ada..dad071e1 100644 --- a/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 +++ b/google/cloud/documentai_toolbox/templates/hocr_document_template.xml.j2 @@ -6,8 +6,9 @@ + - +
{% for page in pages -%} @@ -16,13 +17,13 @@ {% set bidx = loop.index0 -%} {% for paragraph in docai_block.paragraphs -%} {% set paridx = loop.index0 -%} - {% for line in paragraph.lines -%} +{% for line in paragraph.lines -%} {% set lidx = loop.index0 -%} {{ line.text }}{% for token in line.tokens -%} {% set tidx = loop.index0 -%} {{ token.text }}{% endfor -%} {% endfor -%} -
{% endfor -%} + {% endfor -%} {% endfor -%} {% endfor -%} diff --git a/google/cloud/documentai_toolbox/version.py b/google/cloud/documentai_toolbox/version.py index b0ddaabf..301f0b58 100644 --- a/google/cloud/documentai_toolbox/version.py +++ b/google/cloud/documentai_toolbox/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.11.1-alpha" +__version__ = "0.11.2-alpha" diff --git a/noxfile.py b/noxfile.py index fc49ce9e..779d7921 100644 --- a/noxfile.py +++ b/noxfile.py @@ -301,7 +301,7 @@ def docs(session): ) -@nox.session(python="3.9") +@nox.session(python="3.10") def docfx(session): """Build the docfx yaml files for this library.""" diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 331425b6..e763bc58 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.4.2 +pytest==7.4.3 mock==5.1.0 -google-cloud-bigquery==3.12.0 +google-cloud-bigquery==3.13.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f02bf7a1..6d2bd72c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==3.12.0 -google-cloud-documentai==2.20.1 -google-cloud-storage==2.12.0 -google-cloud-documentai-toolbox==0.10.2a0 +google-cloud-bigquery==3.13.0 +google-cloud-documentai==2.20.2 +google-cloud-storage==2.13.0 +google-cloud-documentai-toolbox==0.11.1a0 diff --git a/samples/snippets/test_convert_document_to_hocr_sample.py b/samples/snippets/test_convert_document_to_hocr_sample.py index e3ed9f2b..776c0b96 100644 --- a/samples/snippets/test_convert_document_to_hocr_sample.py +++ b/samples/snippets/test_convert_document_to_hocr_sample.py @@ -24,7 +24,11 @@ def test_convert_document_to_hocr_sample() -> None: document_path=document_path, document_title=document_title ) - with open("../../tests/unit/resources/toolbox_invoice_test_0_hocr.xml", "r") as f: + with open( + "../../tests/unit/resources/toolbox_invoice_test_0_hocr.xml", + "r", + encoding="utf-8", + ) as f: expected = f.read() assert actual == expected diff --git a/setup.py b/setup.py index 7a29932e..abece197 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ "immutabledict >= 2.0.0, < 3.0.0dev; python_version<'3.8'", "Pillow >= 9.5.0, < 11.0.0", "Jinja2 >= 3.1.0, <= 4.0.0", + "hocr-spec >= 0.2.0", ), python_requires=">=3.7", classifiers=[ diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt index c9f0e4bb..25aa22a8 100644 --- a/testing/constraints-3.10.txt +++ b/testing/constraints-3.10.txt @@ -11,3 +11,4 @@ google-cloud-documentai google-cloud-storage numpy pikepdf +hocr-spec diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt index c9f0e4bb..25aa22a8 100644 --- a/testing/constraints-3.11.txt +++ b/testing/constraints-3.11.txt @@ -11,3 +11,4 @@ google-cloud-documentai google-cloud-storage numpy pikepdf +hocr-spec diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 3c64ab2e..0a9af7ff 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -14,3 +14,4 @@ google-cloud-documentai==2.20.0 google-cloud-storage==2.7.0 numpy==1.19.5 pikepdf==6.2.9 +hocr-spec==0.2.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index ed1905e2..a9d4c497 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -11,3 +11,4 @@ google-cloud-documentai google-cloud-storage numpy==1.21.6 pikepdf==8.2.3 +hocr-spec diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index c9f0e4bb..25aa22a8 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -11,3 +11,4 @@ google-cloud-documentai google-cloud-storage numpy pikepdf +hocr-spec diff --git a/tests/unit/resources/toolbox_invoice_test_0_hocr.xml b/tests/unit/resources/toolbox_invoice_test_0_hocr.xml index 0cd8e171..4e265f7d 100644 --- a/tests/unit/resources/toolbox_invoice_test_0_hocr.xml +++ b/tests/unit/resources/toolbox_invoice_test_0_hocr.xml @@ -6,84 +6,85 @@ + - + -Invoice Invoice -
DATE: 01/01/1970 +DATE: 01/01/1970 DATE: 01/01/1970 INVOICE: NO. 001 INVOICE: NO. 001 -
FROM: Company ABC +FROM: Company ABC FROM: Company ABC user@companyabc.com user@companyabc.com -
TO: John Doe +TO: John Doe TO: John Doe johndoe@email.com johndoe@email.com -
ADDRESS: 111 Main Street +ADDRESS: 111 Main Street ADDRESS: 111 Main Street Anytown, USA Anytown, USA -
ADDRESS: 222 Main Street +ADDRESS: 222 Main Street ADDRESS: 222 Main Street Anytown, USA Anytown, USA -
TERMS: 6 month contract +TERMS: 6 month contract TERMS: 6 month contract DUE: 01/01/2025 DUE: 01/01/2025 -
Item Description +Item Description Item Description -
Quantity +Quantity Quantity -
Price +Price Price -
Amount +Amount Amount -
Tool A +Tool A Tool A -
500 +500 500 -
$1.00 +$1.00 $1.00 -
$500.00 +$500.00 $500.00 -
Service B +Service B Service B -
1 +1 1 -
$900.00 +$900.00 $900.00 -
$900.00 +$900.00 $900.00 -
Resource C +Resource C Resource C -
50 +50 50 -
$12.00 +$12.00 $12.00 -
$600.00 +$600.00 $600.00 -
Subtotal +Subtotal Subtotal -
$2000.00 +$2000.00 $2000.00 -
Tax +Tax Tax -
$140.00 +$140.00 $140.00 -
BALANCE DUE +BALANCE DUE BALANCE DUE -
$2140.00 +$2140.00 $2140.00 -
NOTES: +NOTES: NOTES: -
Supplies used for Project Q. +Supplies used for Project Q. Supplies used for Project Q. -