From 8f847120dae73a50439d6d7c298548a3c1e20cd1 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 30 Dec 2021 17:51:40 -0500 Subject: [PATCH 01/26] chore: update release_level in repo-metadata.json (#51) * chore: update .repo-metadata.json * revert * remove api_shortname --- .repo-metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.repo-metadata.json b/.repo-metadata.json index 0f8ca37..176e5d9 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -2,7 +2,7 @@ "name": "db-dtypes", "name_pretty": "Pandas Data Types for SQL systems (BigQuery, Spanner)", "client_documentation": "https://googleapis.dev/python/db-dtypes/latest/index.html", - "release_level": "beta", + "release_level": "preview", "language": "python", "library_type": "INTEGRATION", "repo": "googleapis/python-db-dtypes-pandas", From 41b9112e4c989e5567427caeca0577b3c1eeef76 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 6 Jan 2022 10:51:36 -0700 Subject: [PATCH 02/26] chore: use python-samples-reviewers (#52) Source-Link: https://github.com/googleapis/synthtool/commit/da9308710160980198d85a4bcddac1d6f6f1a5bc Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:899d5d7cc340fa8ef9d8ae1a8cfba362c6898584f779e156f25ee828ba824610 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .github/CODEOWNERS | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7519fa3..f33299d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:0e18b9475fbeb12d9ad4302283171edebb6baf2dfca1bd215ee3b34ed79d95d7 + digest: sha256:899d5d7cc340fa8ef9d8ae1a8cfba362c6898584f779e156f25ee828ba824610 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f8714a3..193b436 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,5 +8,5 @@ # @googleapis/yoshi-python @googleapis/api-bigquery are the default owners for changes in this repo * @googleapis/yoshi-python @googleapis/api-bigquery -# @googleapis/python-samples-owners @googleapis/api-bigquery are the default owners for samples changes -/samples/ @googleapis/python-samples-owners @googleapis/api-bigquery +# @googleapis/python-samples-reviewers @googleapis/api-bigquery are the default owners for samples changes +/samples/ @googleapis/python-samples-reviewers @googleapis/api-bigquery From 83312830fb83ef6ff12377e36060a3e7b60ce570 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 11 Jan 2022 07:36:02 -0500 Subject: [PATCH 03/26] chore(samples): Add check for tests in directory (#54) Source-Link: https://github.com/googleapis/synthtool/commit/52aef91f8d25223d9dbdb4aebd94ba8eea2101f3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/snippets/noxfile.py | 70 +++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index f33299d..6b8a73b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:899d5d7cc340fa8ef9d8ae1a8cfba362c6898584f779e156f25ee828ba824610 + digest: sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 93a9122..3bbef5d 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -14,6 +14,7 @@ from __future__ import print_function +import glob import os from pathlib import Path import sys @@ -184,37 +185,44 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + # check for presence of tests + test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + if len(test_list) == 0: + print("No tests found, skipping directory.") + else: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install( + "-r", "requirements-test.txt", "-c", "constraints-test.txt" + ) + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) From 97767e09a9c43c1681f603b97127e63ace8621a0 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 14 Jan 2022 21:42:12 +0000 Subject: [PATCH 04/26] chore(python): update release.sh to use keystore (#56) build: switch to release-please for tagging --- .github/.OwlBot.lock.yaml | 2 +- .github/release-please.yml | 1 + .github/release-trigger.yml | 1 + .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 12 +++++++++++- 5 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 .github/release-trigger.yml diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6b8a73b..eecb84c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 + digest: sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 diff --git a/.github/release-please.yml b/.github/release-please.yml index 4507ad0..466597e 100644 --- a/.github/release-please.yml +++ b/.github/release-please.yml @@ -1 +1,2 @@ releaseType: python +handleGHRelease: true diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml new file mode 100644 index 0000000..d4ca941 --- /dev/null +++ b/.github/release-trigger.yml @@ -0,0 +1 @@ +enabled: true diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 297fec5..aa6052b 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") cd github/python-db-dtypes-pandas python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 5917933..44cc384 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,8 +23,18 @@ env_vars: { value: "github/python-db-dtypes-pandas/.kokoro/release.sh" } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google-cloud-pypi-token-keystore-1" + } + } +} + # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } From 40e7e2ca01f7ae5e7978c2c4a0910a6ad0cc6e86 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 18 Jan 2022 20:25:26 -0500 Subject: [PATCH 05/26] chore(python): Noxfile recognizes that tests can live in a folder (#58) Source-Link: https://github.com/googleapis/synthtool/commit/4760d8dce1351d93658cb11d02a1b7ceb23ae5d7 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/snippets/noxfile.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index eecb84c..52d79c1 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ae600f36b6bc972b368367b6f83a1d91ec2c82a4a116b383d67d547c56fe6de3 + digest: sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 3bbef5d..20cdfc6 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -187,6 +187,7 @@ def _session_tests( ) -> None: # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list.extend(glob.glob("tests")) if len(test_list) == 0: print("No tests found, skipping directory.") else: From 80a907fcf3636d58f3e6f4803b8ff8c533ad2e3e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 21 Jan 2022 07:31:11 -0500 Subject: [PATCH 06/26] ci(python): run lint / unit tests / docs as GH actions (#59) * ci(python): run lint / unit tests / docs as GH actions Source-Link: https://github.com/googleapis/synthtool/commit/57be0cdb0b94e1669cee0ca38d790de1dfdbcd44 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 * add commit to trigger gh actions Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 16 +++++++++- .github/workflows/docs.yml | 38 +++++++++++++++++++++++ .github/workflows/lint.yml | 25 +++++++++++++++ .github/workflows/unittest.yml | 57 ++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/unittest.yml diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 52d79c1..b668c04 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,17 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f0e4b51deef56bed74d3e2359c583fc104a8d6367da3984fc5c66938db738828 + digest: sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 + diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..f7b8344 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,38 @@ +on: + pull_request: + branches: + - main +name: docs +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docs + run: | + nox -s docs + docfx: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docfx + run: | + nox -s docfx diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..1e8b05c --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,25 @@ +on: + pull_request: + branches: + - main +name: lint +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run lint + run: | + nox -s lint + - name: Run lint_setup_py + run: | + nox -s lint_setup_py diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 0000000..074ee25 --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,57 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.6', '3.7', '3.8', '3.9', '3.10'] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s unit-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v2 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} + + cover: + runs-on: ubuntu-latest + needs: + - unit + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v2 + with: + name: coverage-artifacts + path: .coverage-results/ + - name: Report coverage results + run: | + coverage combine .coverage-results/.coverage* + coverage report --show-missing --fail-under=100 From 5cb2c6bc534ff2ac6b1bf2fa79ffe3c60b9c3c5f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 25 Jan 2022 15:42:25 -0600 Subject: [PATCH 07/26] chore: temporarily add custom repo settings (#61) --- .github/sync-repo-settings.yaml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/sync-repo-settings.yaml diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 0000000..ebe13aa --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,31 @@ +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `main` +- pattern: main + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true + requiredStatusCheckContexts: + - 'cla/google' + - 'OwlBot Post Processor' + - 'docs' + - 'docfx' + - 'lint' + - 'unit (3.6)' + - 'unit (3.7)' + - 'unit (3.8)' + - 'unit (3.9)' + - 'unit (3.10)' + - 'cover' +permissionRules: + - team: actools-python + permission: admin + - team: actools + permission: admin + - team: yoshi-python + permission: push + - team: python-samples-owners + permission: push + - team: python-samples-reviewers + permission: push From e9d41d17b5d6a7d83c46e2497feb8e314545adcb Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Jan 2022 09:17:21 -0600 Subject: [PATCH 08/26] fix: use public pandas APIs where possible (#60) * refactor: use public pandas APIs where possible * no need to override take * backport take implementation * move remaining private pandas methods to backports * add note about _validate_scalar to docstring * comment why we can't use public mixin --- db_dtypes/__init__.py | 16 +++---- db_dtypes/core.py | 80 ++++++++--------------------------- db_dtypes/pandas_backports.py | 47 +++++++++++++++++++- 3 files changed, 69 insertions(+), 74 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 056be28..a518a0b 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -22,13 +22,7 @@ import numpy import packaging.version import pandas -import pandas.compat.numpy.function -import pandas.core.algorithms -import pandas.core.arrays -import pandas.core.dtypes.base -import pandas.core.dtypes.dtypes -import pandas.core.dtypes.generic -import pandas.core.nanops +import pandas.api.extensions import pyarrow import pyarrow.compute @@ -44,7 +38,7 @@ pandas_release = packaging.version.parse(pandas.__version__).release -@pandas.core.dtypes.dtypes.register_extension_dtype +@pandas.api.extensions.register_extension_dtype class TimeDtype(core.BaseDatetimeDtype): """ Extension dtype for time data. @@ -113,7 +107,7 @@ def _datetime( .as_py() ) - if scalar is None: + if pandas.isna(scalar): return None if isinstance(scalar, datetime.time): return pandas.Timestamp( @@ -194,7 +188,7 @@ def __arrow_array__(self, type=None): ) -@pandas.core.dtypes.dtypes.register_extension_dtype +@pandas.api.extensions.register_extension_dtype class DateDtype(core.BaseDatetimeDtype): """ Extension dtype for time data. @@ -238,7 +232,7 @@ def _datetime( if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)): scalar = scalar.as_py() - if scalar is None: + if pandas.isna(scalar): return None elif isinstance(scalar, datetime.date): return pandas.Timestamp( diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 3ade198..05daf37 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -12,20 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, Sequence +from typing import Optional import numpy import pandas -from pandas._libs import NaT +from pandas import NaT import pandas.api.extensions -import pandas.compat.numpy.function -import pandas.core.algorithms -import pandas.core.arrays -import pandas.core.dtypes.base -from pandas.core.dtypes.common import is_dtype_equal, is_list_like, pandas_dtype -import pandas.core.dtypes.dtypes -import pandas.core.dtypes.generic -import pandas.core.nanops +from pandas.api.types import is_dtype_equal, is_list_like, pandas_dtype from db_dtypes import pandas_backports @@ -107,42 +100,11 @@ def isna(self): return pandas.isna(self._ndarray) def _validate_scalar(self, value): - if pandas.isna(value): - return None - - if not isinstance(value, self.dtype.type): - raise ValueError(value) - - return value - - def take( - self, - indices: Sequence[int], - *, - allow_fill: bool = False, - fill_value: Any = None, - ): - indices = numpy.asarray(indices, dtype=numpy.intp) - data = self._ndarray - if allow_fill: - fill_value = self._validate_scalar(fill_value) - fill_value = ( - numpy.datetime64() if fill_value is None else self._datetime(fill_value) - ) - if (indices < -1).any(): - raise ValueError( - "take called with negative indexes other than -1," - " when a fill value is provided." - ) - out = data.take(indices) - if allow_fill: - out[indices == -1] = fill_value - - return self.__class__(out) - - # TODO: provide implementations of dropna, fillna, unique, - # factorize, argsort, searchsoeted for better performance over - # abstract implementations. + """ + Validate and convert a scalar value to datetime64[ns] for storage in + backing NumPy array. + """ + return self._datetime(value) def any( self, @@ -152,10 +114,8 @@ def any( keepdims: bool = False, skipna: bool = True, ): - pandas.compat.numpy.function.validate_any( - (), {"out": out, "keepdims": keepdims} - ) - result = pandas.core.nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + pandas_backports.numpy_validate_any((), {"out": out, "keepdims": keepdims}) + result = pandas_backports.nanany(self._ndarray, axis=axis, skipna=skipna) return result def all( @@ -166,22 +126,20 @@ def all( keepdims: bool = False, skipna: bool = True, ): - pandas.compat.numpy.function.validate_all( - (), {"out": out, "keepdims": keepdims} - ) - result = pandas.core.nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + pandas_backports.numpy_validate_all((), {"out": out, "keepdims": keepdims}) + result = pandas_backports.nanall(self._ndarray, axis=axis, skipna=skipna) return result def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): - pandas.compat.numpy.function.validate_min((), kwargs) - result = pandas.core.nanops.nanmin( + pandas_backports.numpy_validate_min((), kwargs) + result = pandas_backports.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._box_func(result) def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): - pandas.compat.numpy.function.validate_max((), kwargs) - result = pandas.core.nanops.nanmax( + pandas_backports.numpy_validate_max((), kwargs) + result = pandas_backports.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._box_func(result) @@ -197,11 +155,9 @@ def median( keepdims: bool = False, skipna: bool = True, ): - pandas.compat.numpy.function.validate_median( + pandas_backports.numpy_validate_median( (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, ) - result = pandas.core.nanops.nanmedian( - self._ndarray, axis=axis, skipna=skipna - ) + result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) return self._box_func(result) diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index 003224f..4b733cc 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -20,15 +20,32 @@ """ import operator +from typing import Any import numpy import packaging.version import pandas -from pandas._libs.lib import is_integer +from pandas.api.types import is_integer +import pandas.compat.numpy.function +import pandas.core.nanops pandas_release = packaging.version.parse(pandas.__version__).release +# Create aliases for private methods in case they move in a future version. +nanall = pandas.core.nanops.nanall +nanany = pandas.core.nanops.nanany +nanmax = pandas.core.nanops.nanmax +nanmin = pandas.core.nanops.nanmin +numpy_validate_all = pandas.compat.numpy.function.validate_all +numpy_validate_any = pandas.compat.numpy.function.validate_any +numpy_validate_max = pandas.compat.numpy.function.validate_max +numpy_validate_min = pandas.compat.numpy.function.validate_min + +if pandas_release >= (1, 2): + nanmedian = pandas.core.nanops.nanmedian + numpy_validate_median = pandas.compat.numpy.function.validate_median + def import_default(module_name, force=False, default=None): """ @@ -55,6 +72,10 @@ def import_default(module_name, force=False, default=None): return getattr(module, name, default) +# pandas.core.arraylike.OpsMixin is private, but the related public API +# "ExtensionScalarOpsMixin" is not sufficient for adding dates to times. +# It results in unsupported operand type(s) for +: 'datetime.time' and +# 'datetime.date' @import_default("pandas.core.arraylike") class OpsMixin: def _cmp_method(self, other, op): # pragma: NO COVER @@ -81,6 +102,8 @@ def __ge__(self, other): __add__ = __radd__ = __sub__ = lambda self, other: NotImplemented +# TODO: use public API once pandas 1.5 / 2.x is released. +# See: https://github.com/pandas-dev/pandas/pull/45544 @import_default("pandas.core.arrays._mixins", pandas_release < (1, 3)) class NDArrayBackedExtensionArray(pandas.core.arrays.base.ExtensionArray): @@ -130,6 +153,28 @@ def copy(self): def repeat(self, n): return self.__class__(self._ndarray.repeat(n), self._dtype) + def take( + self, + indices, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: int = 0, + ): + from pandas.core.algorithms import take + + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) + return self._from_backing_data(new_data) + @classmethod def _concat_same_type(cls, to_concat, axis=0): dtypes = {str(x.dtype) for x in to_concat} From f903c2c68da1629241cf3bf37e1226babae669f4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 2 Feb 2022 16:17:42 -0600 Subject: [PATCH 09/26] fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes (#67) * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. * adjust pandas version support for median BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 --- db_dtypes/__init__.py | 8 +-- db_dtypes/core.py | 5 +- db_dtypes/pandas_backports.py | 2 +- testing/constraints-3.9.txt | 3 +- tests/unit/test_date.py | 27 ++++++++ tests/unit/test_dtypes.py | 112 ++++++++++++++++++---------------- tests/unit/test_time.py | 30 +++++++++ 7 files changed, 127 insertions(+), 60 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a518a0b..a222e6d 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -145,8 +145,8 @@ def _datetime( raise TypeError("Invalid value type", scalar) def _box_func(self, x): - if pandas.isnull(x): - return None + if pandas.isna(x): + return pandas.NaT try: return x.astype("= (1, 2): +if pandas_release >= (1, 3): nanmedian = pandas.core.nanops.nanmedian numpy_validate_median = pandas.compat.numpy.function.validate_median diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index eebb9da..d814dcd 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1 +1,2 @@ -sqlalchemy>=1.4.13 +# Make sure we test with pandas 1.3.0. The Python version isn't that relevant. +pandas==1.3.0 diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index b906f24..bf877ea 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -19,6 +19,7 @@ # To register the types. import db_dtypes # noqa +from db_dtypes import pandas_backports @pytest.mark.parametrize( @@ -65,3 +66,29 @@ def test_date_parsing(value, expected): def test_date_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbdate") + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +@pytest.mark.parametrize( + "values, expected", + [ + (["1970-01-01", "1900-01-01", "2000-01-01"], datetime.date(1970, 1, 1)), + ( + [ + None, + "1900-01-01", + pandas.NA if hasattr(pandas, "NA") else None, + pandas.NaT, + float("nan"), + ], + datetime.date(1900, 1, 1), + ), + (["2222-02-01", "2222-02-03"], datetime.date(2222, 2, 2)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbdate") + assert series.median() == expected diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index aacbf0b..66074d8 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -23,8 +23,8 @@ pandas_release = packaging.version.parse(pd.__version__).release SAMPLE_RAW_VALUES = dict( - dbdate=(datetime.date(2021, 2, 2), "2021-2-3", None), - dbtime=(datetime.time(1, 2, 2), "1:2:3.5", None), + dbdate=(datetime.date(2021, 2, 2), "2021-2-3", pd.NaT), + dbtime=(datetime.time(1, 2, 2), "1:2:3.5", pd.NaT), ) SAMPLE_VALUES = dict( dbdate=( @@ -90,7 +90,7 @@ def test_array_construction(dtype, factory_method): factory = getattr(factory, factory_method) if factory_method == "_from_sequence_of_strings": sample_raw_values = [ - str(v) if v is not None else v for v in sample_raw_values + str(v) if not pd.isna(v) else v for v in sample_raw_values ] a = factory(sample_raw_values) assert len(a) == 3 @@ -98,11 +98,11 @@ def test_array_construction(dtype, factory_method): assert a.shape == (3,) sample_values = SAMPLE_VALUES[dtype] assert a[0], a[1] == sample_values[:2] - assert a[2] is None + assert pd.isna(a[2]) and a[2] is pd.NaT # implementation details: assert a.nbytes == 24 - assert np.array_equal( + np.testing.assert_array_equal( a._ndarray == np.array(SAMPLE_DT_VALUES[dtype][:2] + ("NaT",), dtype="datetime64[us]"), [True, True, False], @@ -121,7 +121,7 @@ def test_time_series_construction(dtype): s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype) assert len(s) == 3 assert s[0], s[1] == sample_values[:2] - assert s[2] is None + assert s[2] is pd.NaT assert s.nbytes == 24 assert isinstance(s.array, _cls(dtype)) @@ -166,8 +166,8 @@ def test_timearray_comparisons( # Note that the right_obs comparisons work because # they're called on right_obs rather then left, because # TimeArrays only support comparisons with TimeArrays. - assert np.array_equal(comparisons[op](left, r), expected) - assert np.array_equal(complements[op](left, r), ~expected) + np.testing.assert_array_equal(comparisons[op](left, r), expected) + np.testing.assert_array_equal(complements[op](left, r), ~expected) # Bad shape for bad_shape in ([], [1, 2, 3]): @@ -186,10 +186,10 @@ def test_timearray_comparisons( [1], # a single-element array gets broadcast ): if op == "==": - assert np.array_equal( + np.testing.assert_array_equal( comparisons[op](left, np.array(bad_items)), np.array([False, False]) ) - assert np.array_equal( + np.testing.assert_array_equal( complements[op](left, np.array(bad_items)), np.array([True, True]) ) else: @@ -204,7 +204,7 @@ def test_timearray_comparisons( def test___getitem___arrayindex(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] - assert np.array_equal( + np.testing.assert_array_equal( cls(sample_values)[[1, 3]], cls([sample_values[1], sample_values[3]]), ) @@ -215,21 +215,23 @@ def test_timearray_slicing(dtype): b = a[:] assert b is not a assert b.__class__ == a.__class__ - assert np.array_equal(b, a) + np.testing.assert_array_equal(b._ndarray, a._ndarray) sample_values = SAMPLE_VALUES[dtype] cls = _cls(dtype) - assert np.array_equal(a[:1], cls._from_sequence(sample_values[:1])) + np.testing.assert_array_equal( + a[:1]._ndarray, cls._from_sequence(sample_values[:1])._ndarray + ) # Assignment works: a[:1] = cls._from_sequence([sample_values[2]]) - assert np.array_equal( + np.testing.assert_array_equal( a[:2], cls._from_sequence([sample_values[2], sample_values[1]]) ) # Series also work: s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype) - assert np.array_equal(s[:1].array, cls._from_sequence([sample_values[0]])) + np.testing.assert_array_equal(s[:1].array, cls._from_sequence([sample_values[0]])) @for_date_and_time @@ -238,9 +240,13 @@ def test_item_assignment(dtype): sample_values = SAMPLE_VALUES[dtype] cls = _cls(dtype) a[0] = sample_values[2] - assert np.array_equal(a, cls._from_sequence([sample_values[2], sample_values[1]])) + np.testing.assert_array_equal( + a, cls._from_sequence([sample_values[2], sample_values[1]]) + ) a[1] = None - assert np.array_equal(a, cls._from_sequence([sample_values[2], None])) + np.testing.assert_array_equal( + a._ndarray, cls._from_sequence([sample_values[2], None])._ndarray + ) @for_date_and_time @@ -249,9 +255,9 @@ def test_array_assignment(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] a[a.isna()] = sample_values[3] - assert np.array_equal(a, cls([sample_values[i] for i in (0, 1, 3)])) + np.testing.assert_array_equal(a, cls([sample_values[i] for i in (0, 1, 3)])) a[[0, 2]] = sample_values[2] - assert np.array_equal(a, cls([sample_values[i] for i in (2, 1, 2)])) + np.testing.assert_array_equal(a, cls([sample_values[i] for i in (2, 1, 2)])) @for_date_and_time @@ -270,7 +276,7 @@ def test_copy(dtype): b = a.copy() assert b is not a assert b._ndarray is not a._ndarray - assert np.array_equal(b, a) + np.testing.assert_array_equal(b, a) @for_date_and_time @@ -280,7 +286,7 @@ def test_from_ndarray_copy(dtype): a = cls._from_sequence(sample_values) b = cls(a._ndarray, copy=True) assert b._ndarray is not a._ndarray - assert np.array_equal(b, a) + np.testing.assert_array_equal(b, a) @for_date_and_time @@ -310,7 +316,7 @@ def test__validate_scalar_invalid(dtype): [ (False, None), (True, None), - (True, pd._libs.NaT if pd else None), + (True, pd.NaT if pd else None), (True, np.NaN if pd else None), (True, 42), ], @@ -326,7 +332,7 @@ def test_take(dtype, allow_fill, fill_value): else datetime.time(0, 42, 42, 424242) ) else: - expected_fill = None + expected_fill = pd.NaT b = a.take([1, -1, 3], allow_fill=True, fill_value=fill_value) expect = [sample_values[1], expected_fill, sample_values[3]] else: @@ -370,7 +376,7 @@ def test__concat_same_type_not_same_type(dtype): @for_date_and_time def test_dropna(dtype): - assert np.array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2]) + np.testing.assert_array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2]) @pytest.mark.parametrize( @@ -398,14 +404,18 @@ def test_fillna(dtype, value, meth, limit, expect): elif value is not None: value = sample_values[value] expect = cls([None if i is None else sample_values[i] for i in expect]) - assert np.array_equal(a.fillna(value, meth, limit), expect) + np.testing.assert_array_equal( + a.fillna(value, meth, limit)._ndarray, expect._ndarray + ) @for_date_and_time def test_unique(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] - assert np.array_equal(cls(sample_values * 3).unique(), cls(sample_values),) + np.testing.assert_array_equal( + cls(sample_values * 3).unique(), cls(sample_values), + ) @for_date_and_time @@ -421,7 +431,7 @@ def test_astype_copy(dtype): b = a.astype(a.dtype, copy=True) assert b is not a assert b.__class__ is a.__class__ - assert np.array_equal(b, a) + np.testing.assert_array_equal(b._ndarray, a._ndarray) @pytest.mark.parametrize( @@ -452,7 +462,7 @@ def test_asdatetime(dtype, same): b = a.astype(dt, copy=copy) assert b is not a._ndarray - assert np.array_equal(b[:2], a._ndarray[:2]) + np.testing.assert_array_equal(b[:2], a._ndarray[:2]) assert pd.isna(b[2]) and str(b[2]) == "NaT" @@ -482,7 +492,7 @@ def test_astimedelta(dtype): a = _cls("dbtime")([t, None]) b = a.astype(dtype) - np.array_equal(b[:1], expect) + np.testing.assert_array_equal(b[:1], expect) assert pd.isna(b[1]) and str(b[1]) == "NaT" @@ -523,7 +533,7 @@ def test_min_max_median(dtype): a = cls(data) assert a.min() == sample_values[0] assert a.max() == sample_values[-1] - if pandas_release >= (1, 2): + if pandas_release >= (1, 3): assert ( a.median() == datetime.time(1, 2, 4) if dtype == "dbtime" @@ -531,26 +541,26 @@ def test_min_max_median(dtype): ) empty = cls([]) - assert empty.min() is None - assert empty.max() is None - if pandas_release >= (1, 2): - assert empty.median() is None + assert empty.min() is pd.NaT + assert empty.max() is pd.NaT + if pandas_release >= (1, 3): + assert empty.median() is pd.NaT empty = cls([None]) - assert empty.min() is None - assert empty.max() is None - assert empty.min(skipna=False) is None - assert empty.max(skipna=False) is None - if pandas_release >= (1, 2): + assert empty.min() is pd.NaT + assert empty.max() is pd.NaT + assert empty.min(skipna=False) is pd.NaT + assert empty.max(skipna=False) is pd.NaT + if pandas_release >= (1, 3): with pytest.warns(RuntimeWarning, match="empty slice"): # It's weird that we get the warning here, and not # below. :/ - assert empty.median() is None - assert empty.median(skipna=False) is None + assert empty.median() is pd.NaT + assert empty.median(skipna=False) is pd.NaT a = _make_one(dtype) assert a.min() == sample_values[0] assert a.max() == sample_values[1] - if pandas_release >= (1, 2): + if pandas_release >= (1, 3): assert ( a.median() == datetime.time(1, 2, 2, 750000) if dtype == "dbtime" @@ -563,14 +573,14 @@ def test_date_add(): times = _cls("dbtime")(SAMPLE_VALUES["dbtime"]) expect = dates.astype("datetime64") + times.astype("timedelta64") - assert np.array_equal(dates + times, expect) - assert np.array_equal(times + dates, expect) + np.testing.assert_array_equal(dates + times, expect) + np.testing.assert_array_equal(times + dates, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) + np.testing.assert_array_equal(dates + do, expect) if pandas_release >= (1, 1): - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(do + dates, expect) with pytest.raises(TypeError): dates + times.astype("timedelta64") @@ -587,8 +597,8 @@ def test_date_add(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(dates + do, expect) + np.testing.assert_array_equal(do + dates, expect) def test_date_sub(): @@ -602,11 +612,11 @@ def test_date_sub(): ) ) expect = dates.astype("datetime64") - dates2.astype("datetime64") - assert np.array_equal(dates - dates2, expect) + np.testing.assert_array_equal(dates - dates2, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) with pytest.raises(TypeError): dates - 42 @@ -620,4 +630,4 @@ def test_date_sub(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index ba45949..8ecb996 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -19,6 +19,7 @@ # To register the types. import db_dtypes # noqa +from db_dtypes import pandas_backports @pytest.mark.parametrize( @@ -82,3 +83,32 @@ def test_time_parsing(value, expected): def test_time_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbtime") + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +@pytest.mark.parametrize( + "values, expected", + [ + ( + ["00:00:00", "12:34:56.789101", "23:59:59.999999"], + datetime.time(12, 34, 56, 789101), + ), + ( + [ + None, + "06:30:00", + pandas.NA if hasattr(pandas, "NA") else None, + pandas.NaT, + float("nan"), + ], + datetime.time(6, 30), + ), + (["2:22:21.222222", "2:22:23.222222"], datetime.time(2, 22, 22, 222222)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbtime") + assert series.median() == expected From 9472ab0d2cc41d596d3efaecc3cd04923cb3f595 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 9 Feb 2022 08:50:55 -0500 Subject: [PATCH 10/26] chore: remove custom sync-repo-settings (#69) --- .github/sync-repo-settings.yaml | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 .github/sync-repo-settings.yaml diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml deleted file mode 100644 index ebe13aa..0000000 --- a/.github/sync-repo-settings.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings -# Rules for main branch protection -branchProtectionRules: -# Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `main` -- pattern: main - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'cla/google' - - 'OwlBot Post Processor' - - 'docs' - - 'docfx' - - 'lint' - - 'unit (3.6)' - - 'unit (3.7)' - - 'unit (3.8)' - - 'unit (3.9)' - - 'unit (3.10)' - - 'cover' -permissionRules: - - team: actools-python - permission: admin - - team: actools - permission: admin - - team: yoshi-python - permission: push - - team: python-samples-owners - permission: push - - team: python-samples-reviewers - permission: push From 592e8bf1697166704e25b3e883b688ef0509bf6b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 28 Feb 2022 20:50:35 +0100 Subject: [PATCH 11/26] chore(deps): update all dependencies (#68) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update setup.py Co-authored-by: Owl Bot Co-authored-by: Tim Swast --- samples/snippets/requirements-test.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9270945..c2845bf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==6.2.5 +pytest==7.0.1 diff --git a/setup.py b/setup.py index 8def678..7ad5119 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ dependencies = [ "packaging >= 17.0", "pandas >= 0.24.2, < 2.0dev", - "pyarrow>=3.0.0, <7.0dev", + "pyarrow>=3.0.0, <8.0dev", "numpy >= 1.16.6, < 2.0dev", ] From 6d6e16ace2c99d1167de6e117dd83644dc83d8e0 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 1 Mar 2022 14:54:22 +0000 Subject: [PATCH 12/26] chore(deps): update actions/setup-python action to v3 (#71) Source-Link: https://github.com/googleapis/synthtool/commit/571ee2c3b26182429eddcf115122ee545d7d3787 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:660abdf857d3ab9aabcd967c163c70e657fcc5653595c709263af5f3fa23ef67 --- .github/.OwlBot.lock.yaml | 3 +-- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/unittest.yml | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b668c04..d9a55fa 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:ed1f9983d5a935a89fe8085e8bb97d94e41015252c5b6c9771257cf8624367e6 - + digest: sha256:660abdf857d3ab9aabcd967c163c70e657fcc5653595c709263af5f3fa23ef67 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f7b8344..cca4e98 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install nox @@ -26,7 +26,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1e8b05c..f687324 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -10,7 +10,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 074ee25..d3003e0 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python }} - name: Install nox @@ -39,7 +39,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install coverage From 56e99bb63a2b5cf6f2a0bbab55cd5304f7522d06 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 2 Mar 2022 19:46:57 -0500 Subject: [PATCH 13/26] chore(deps): update actions/checkout action to v3 (#73) Source-Link: https://github.com/googleapis/synthtool/commit/ca879097772aeec2cbb971c3cea8ecc81522b68a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:6162c384d685c5fe22521d3f37f6fc732bf99a085f6d47b677dbcae97fc21392 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/unittest.yml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d9a55fa..480226a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:660abdf857d3ab9aabcd967c163c70e657fcc5653595c709263af5f3fa23ef67 + digest: sha256:6162c384d685c5fe22521d3f37f6fc732bf99a085f6d47b677dbcae97fc21392 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cca4e98..b46d730 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v3 with: @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v3 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f687324..f512a49 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v3 with: diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index d3003e0..e87fe5b 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -11,7 +11,7 @@ jobs: python: ['3.6', '3.7', '3.8', '3.9', '3.10'] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v3 with: @@ -37,7 +37,7 @@ jobs: - unit steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v3 with: From 3bc0d175deedc644bf266f550444560ff6f0415a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 4 Mar 2022 12:06:29 -0500 Subject: [PATCH 14/26] chore: Adding support for pytest-xdist and pytest-parallel (#76) Source-Link: https://github.com/googleapis/synthtool/commit/82f5cb283efffe96e1b6cd634738e0e7de2cd90a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/snippets/noxfile.py | 78 +++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 480226a..7e08e05 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:6162c384d685c5fe22521d3f37f6fc732bf99a085f6d47b677dbcae97fc21392 + digest: sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 20cdfc6..85f5836 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -188,42 +188,52 @@ def _session_tests( # check for presence of tests test_list = glob.glob("*_test.py") + glob.glob("test_*.py") test_list.extend(glob.glob("tests")) + if len(test_list) == 0: print("No tests found, skipping directory.") - else: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install( - "-r", "requirements-test.txt", "-c", "constraints-test.txt" - ) - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) From 9757d38bed6da264bed0b4c3931518c4abfde8d8 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 5 Mar 2022 00:36:25 +0000 Subject: [PATCH 15/26] chore(deps): update actions/download-artifact action to v3 (#77) Source-Link: https://github.com/googleapis/synthtool/commit/38e11ad1104dcc1e63b52691ddf2fe4015d06955 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:4e1991042fe54b991db9ca17c8fb386e61b22fe4d1472a568bf0fcac85dcf5d3 --- .github/.OwlBot.lock.yaml | 2 +- .github/workflows/unittest.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7e08e05..44c78f7 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae + digest: sha256:4e1991042fe54b991db9ca17c8fb386e61b22fe4d1472a568bf0fcac85dcf5d3 diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index e87fe5b..e5be6ed 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -26,7 +26,7 @@ jobs: run: | nox -s unit-${{ matrix.python }} - name: Upload coverage results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: coverage-artifacts path: .coverage-${{ matrix.python }} @@ -47,7 +47,7 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install coverage - name: Download coverage results - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: coverage-artifacts path: .coverage-results/ From 1e979cf360eb586e77b415f7b710a8a41c22e981 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 10 Mar 2022 13:41:10 -0600 Subject: [PATCH 16/26] fix: correct TypeError and comparison issues discovered in DateArray compliance tests (#79) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * fix: correct dtype and interface compliance errors in DateArray * add compliance tests to github actions * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * split coverage * add nox session back * fix unit session * move compliance tests and remove unnecessary test * no need for coverage upload * fix coverage * restore coverage * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot --- .github/workflows/compliance.yml | 27 ++++++++++ db_dtypes/core.py | 16 ++++-- noxfile.py | 13 +++-- owlbot.py | 28 ++++++++++ tests/compliance/conftest.py | 53 +++++++++++++++++++ tests/compliance/date/conftest.py | 47 ++++++++++++++++ tests/compliance/date/test_date_compliance.py | 47 ++++++++++++++++ tests/unit/test_date.py | 16 +++++- 8 files changed, 239 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/compliance.yml create mode 100644 tests/compliance/conftest.py create mode 100644 tests/compliance/date/conftest.py create mode 100644 tests/compliance/date/test_date_compliance.py diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml new file mode 100644 index 0000000..77e6b05 --- /dev/null +++ b/.github/workflows/compliance.yml @@ -0,0 +1,27 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + compliance: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance tests + env: + COVERAGE_FILE: .coverage-compliance-${{ matrix.python }} + run: | + nox -s compliance diff --git a/db_dtypes/core.py b/db_dtypes/core.py index a06c6d6..b5b0b7a 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -17,7 +17,7 @@ import numpy import pandas import pandas.api.extensions -from pandas.api.types import is_dtype_equal, is_list_like, pandas_dtype +from pandas.api.types import is_dtype_equal, is_list_like, is_scalar, pandas_dtype from db_dtypes import pandas_backports @@ -31,9 +31,14 @@ class BaseDatetimeDtype(pandas.api.extensions.ExtensionDtype): names = None @classmethod - def construct_from_string(cls, name): + def construct_from_string(cls, name: str): + if not isinstance(name, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(name)}" + ) + if name != cls.name: - raise TypeError() + raise TypeError(f"Cannot construct a '{cls.__name__}' from 'another_type'") return cls() @@ -74,6 +79,11 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def _cmp_method(self, other, op): + """Compare array values, for use in OpsMixin.""" + + if is_scalar(other) and (pandas.isna(other) or type(other) == self.dtype.type): + other = type(self)([other]) + oshape = getattr(other, "shape", None) if oshape != self.shape and oshape != (1,) and self.shape != (1,): raise TypeError( diff --git a/noxfile.py b/noxfile.py index 5f48361..54421d8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -37,6 +37,7 @@ nox.options.sessions = [ "lint", "unit", + "compliance", "cover", "lint_setup_py", "blacken", @@ -77,7 +78,7 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session): +def default(session, tests_path): # Install all test dependencies, then install this package in-place. constraints_path = str( @@ -106,15 +107,21 @@ def default(session): "--cov-config=.coveragerc", "--cov-report=", "--cov-fail-under=0", - os.path.join("tests", "unit"), + tests_path, *session.posargs, ) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance(session): + """Run the compliance test suite.""" + default(session, os.path.join("tests", "compliance")) + + @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" - default(session) + default(session, os.path.join("tests", "unit")) @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) diff --git a/owlbot.py b/owlbot.py index 30f3b3d..6c59671 100644 --- a/owlbot.py +++ b/owlbot.py @@ -64,11 +64,39 @@ new_sessions = """ "lint", "unit", + "compliance", "cover", """ s.replace(["noxfile.py"], old_sessions, new_sessions) +# Add compliance tests. +s.replace( + ["noxfile.py"], r"def default\(session\):", "def default(session, tests_path):" +) +s.replace(["noxfile.py"], r'os.path.join\("tests", "unit"\),', "tests_path,") +s.replace( + ["noxfile.py"], + r''' +@nox.session\(python=UNIT_TEST_PYTHON_VERSIONS\) +def unit\(session\): + """Run the unit test suite.""" + default\(session\) +''', + ''' +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance(session): + """Run the compliance test suite.""" + default(session, os.path.join("tests", "compliance")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS) +def unit(session): + """Run the unit test suite.""" + default(session, os.path.join("tests", "unit")) +''', +) + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- diff --git a/tests/compliance/conftest.py b/tests/compliance/conftest.py new file mode 100644 index 0000000..bc76692 --- /dev/null +++ b/tests/compliance/conftest.py @@ -0,0 +1,53 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas +import pytest + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture +def na_value(): + return pandas.NaT + + +@pytest.fixture +def na_cmp(): + """ + Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + and + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_datetime.py + """ + + def cmp(a, b): + return a is pandas.NaT and a is b + + return cmp diff --git a/tests/compliance/date/conftest.py b/tests/compliance/date/conftest.py new file mode 100644 index 0000000..e25ccc9 --- /dev/null +++ b/tests/compliance/date/conftest.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import numpy +import pytest + +from db_dtypes import DateArray, DateDtype + + +@pytest.fixture +def data(): + return DateArray( + numpy.arange( + datetime.datetime(1900, 1, 1), + datetime.datetime(2099, 12, 31), + datetime.timedelta(days=731), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid] + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray([None, datetime.date(2022, 1, 27)]) + + +@pytest.fixture +def dtype(): + return DateDtype() diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py new file mode 100644 index 0000000..a805ecd --- /dev/null +++ b/tests/compliance/date/test_date_compliance.py @@ -0,0 +1,47 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index bf877ea..bce2dc1 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -13,15 +13,27 @@ # limitations under the License. import datetime +import operator import pandas +import pandas.testing import pytest -# To register the types. -import db_dtypes # noqa +import db_dtypes from db_dtypes import pandas_backports +def test_construct_from_string_with_nonstring(): + with pytest.raises(TypeError): + db_dtypes.DateDtype.construct_from_string(object()) + + +def test__cmp_method_with_scalar(): + input_array = db_dtypes.DateArray([datetime.date(1900, 1, 1)]) + got = input_array._cmp_method(datetime.date(1900, 1, 1), operator.eq) + assert got[0] + + @pytest.mark.parametrize( "value, expected", [ From 373b71c1536a3fe343d61631aaace88ad0f4163e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sun, 13 Mar 2022 17:04:39 +0100 Subject: [PATCH 17/26] chore(deps): update dependency pytest to v7.1.0 (#80) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index c2845bf..824a8a7 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==7.0.1 +pytest==7.1.0 From f79466c5e17afdf27914c77f09fe000d2d667b16 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 18 Mar 2022 03:13:02 +0100 Subject: [PATCH 18/26] chore(deps): update dependency pytest to v7.1.1 (#83) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 824a8a7..4f6bf64 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==7.1.0 +pytest==7.1.1 From 38ac28d8b16f9b86b5029c85e45e9f2e034159b7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 18 Mar 2022 14:30:35 -0500 Subject: [PATCH 19/26] fix: address failing tests with pandas 1.5.0 (#82) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test: add a test session with prerelease versions of dependencies Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-db-dtypes-pandas/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #81 🦕 --- .github/workflows/compliance.yml | 21 +++++ .github/workflows/unittest-prerelease.yml | 32 ++++++++ db_dtypes/__init__.py | 26 ++++-- db_dtypes/core.py | 6 ++ noxfile.py | 91 +++++++++++++++++++++ owlbot.py | 96 ++++++++++++++++++++++- tests/unit/test_date.py | 24 ++++++ tests/unit/test_time.py | 26 ++++++ 8 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/unittest-prerelease.yml diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index 77e6b05..eca8cc2 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -25,3 +25,24 @@ jobs: COVERAGE_FILE: .coverage-compliance-${{ matrix.python }} run: | nox -s compliance + compliance-prerelease: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance prerelease tests + env: + COVERAGE_FILE: .coverage-compliance-prerelease-${{ matrix.python }} + run: | + nox -s compliance_prerelease diff --git a/.github/workflows/unittest-prerelease.yml b/.github/workflows/unittest-prerelease.yml new file mode 100644 index 0000000..a11568a --- /dev/null +++ b/.github/workflows/unittest-prerelease.yml @@ -0,0 +1,32 @@ +on: + pull_request: + branches: + - main +name: unittest-prerelease +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-prerelease-${{ matrix.python }} + run: | + nox -s unit_prerelease + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a222e6d..d8e2ae5 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -34,6 +34,14 @@ time_dtype_name = "dbtime" _EPOCH = datetime.datetime(1970, 1, 1) _NPEPOCH = numpy.datetime64(_EPOCH) +_NP_DTYPE = "datetime64[ns]" + +# Numpy converts datetime64 scalars to datetime.datetime only if microsecond or +# smaller precision is used. +# +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep +# nanosecond precision when boxing scalars. +_NP_BOX_DTYPE = "datetime64[us]" pandas_release = packaging.version.parse(pandas.__version__).release @@ -149,12 +157,14 @@ def _box_func(self, x): return pandas.NaT try: - return x.astype(" Date: Mon, 21 Mar 2022 15:19:02 -0500 Subject: [PATCH 20/26] fix: dbdate and dbtime support set item with null values (#85) feat: dbdate and dbtime support numpy.datetime64 values in array constructor --- db_dtypes/__init__.py | 10 ++++- db_dtypes/core.py | 18 ++++---- db_dtypes/pandas_backports.py | 2 +- tests/unit/test_date.py | 82 ++++++++++++++++++++++++++++------- tests/unit/test_time.py | 6 +++ 5 files changed, 91 insertions(+), 27 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index d8e2ae5..7889dac 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -106,6 +106,9 @@ def _datetime( r"(?:\.(?P\d*))?)?)?\s*$" ).match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.time. if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)): scalar = ( @@ -116,7 +119,7 @@ def _datetime( ) if pandas.isna(scalar): - return None + return numpy.datetime64("NaT") if isinstance(scalar, datetime.time): return pandas.Timestamp( year=1970, @@ -238,12 +241,15 @@ def _datetime( scalar, match_fn=re.compile(r"\s*(?P\d+)-(?P\d+)-(?P\d+)\s*$").match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.date. if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)): scalar = scalar.as_py() if pandas.isna(scalar): - return None + return numpy.datetime64("NaT") elif isinstance(scalar, datetime.date): return pandas.Timestamp( year=scalar.year, month=scalar.month, day=scalar.day diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 14d76aa..7879571 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -100,14 +100,6 @@ def _cmp_method(self, other, op): return NotImplemented return op(self._ndarray, other._ndarray) - def __setitem__(self, key, value): - if is_list_like(value): - _datetime = self._datetime - value = [_datetime(v) for v in value] - elif not pandas.isna(value): - value = self._datetime(value) - return super().__setitem__(key, value) - def _from_factorized(self, unique, original): return self.__class__(unique) @@ -121,6 +113,16 @@ def _validate_scalar(self, value): """ return self._datetime(value) + def _validate_setitem_value(self, value): + """ + Convert a value for use in setting a value in the backing numpy array. + """ + if is_list_like(value): + _datetime = self._datetime + return [_datetime(v) for v in value] + + return self._datetime(value) + def any( self, *, diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index f53adff..0e39986 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -126,7 +126,7 @@ def __getitem__(self, index): return self.__class__(value, self._dtype) def __setitem__(self, index, value): - self._ndarray[index] = value + self._ndarray[index] = self._validate_setitem_value(value) def __len__(self): return len(self._ndarray) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index 79c97ac..fb41620 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -24,6 +24,33 @@ from db_dtypes import pandas_backports +VALUE_PARSING_TEST_CASES = [ + # Min/Max values for pandas.Timestamp. + ("1677-09-22", datetime.date(1677, 9, 22)), + ("2262-04-11", datetime.date(2262, 4, 11)), + # Typical "zero" values. + ("1900-01-01", datetime.date(1900, 1, 1)), + ("1970-01-01", datetime.date(1970, 1, 1)), + # Assorted values. + ("1993-10-31", datetime.date(1993, 10, 31)), + (datetime.date(1993, 10, 31), datetime.date(1993, 10, 31)), + ("2012-02-29", datetime.date(2012, 2, 29)), + (numpy.datetime64("2012-02-29"), datetime.date(2012, 2, 29)), + ("2021-12-17", datetime.date(2021, 12, 17)), + (pandas.Timestamp("2021-12-17"), datetime.date(2021, 12, 17)), + ("2038-01-19", datetime.date(2038, 1, 19)), +] + +NULL_VALUE_TEST_CASES = [ + None, + pandas.NaT, + float("nan"), +] + +if hasattr(pandas, "NA"): + NULL_VALUE_TEST_CASES.append(pandas.NA) + + def test_box_func(): input_array = db_dtypes.DateArray([]) input_datetime = datetime.datetime(2022, 3, 16) @@ -58,26 +85,49 @@ def test__cmp_method_with_scalar(): assert got[0] -@pytest.mark.parametrize( - "value, expected", - [ - # Min/Max values for pandas.Timestamp. - ("1677-09-22", datetime.date(1677, 9, 22)), - ("2262-04-11", datetime.date(2262, 4, 11)), - # Typical "zero" values. - ("1900-01-01", datetime.date(1900, 1, 1)), - ("1970-01-01", datetime.date(1970, 1, 1)), - # Assorted values. - ("1993-10-31", datetime.date(1993, 10, 31)), - ("2012-02-29", datetime.date(2012, 2, 29)), - ("2021-12-17", datetime.date(2021, 12, 17)), - ("2038-01-19", datetime.date(2038, 1, 19)), - ], -) +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) def test_date_parsing(value, expected): assert pandas.Series([value], dtype="dbdate")[0] == expected +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_parsing_null(value): + assert pandas.Series([value], dtype="dbdate")[0] is pandas.NaT + + +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) +def test_date_set_item(value, expected): + series = pandas.Series([None], dtype="dbdate") + series[0] = value + assert series[0] == expected + + +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_set_item_null(value): + series = pandas.Series(["1970-01-01"], dtype="dbdate") + series[0] = value + assert series[0] is pandas.NaT + + +def test_date_set_slice(): + series = pandas.Series([None, None, None], dtype="dbdate") + series[:] = [ + datetime.date(2022, 3, 21), + "2011-12-13", + numpy.datetime64("1998-09-04"), + ] + assert series[0] == datetime.date(2022, 3, 21) + assert series[1] == datetime.date(2011, 12, 13) + assert series[2] == datetime.date(1998, 9, 4) + + +def test_date_set_slice_null(): + series = pandas.Series(["1970-01-01"] * len(NULL_VALUE_TEST_CASES), dtype="dbdate") + series[:] = NULL_VALUE_TEST_CASES + for row_index in range(len(NULL_VALUE_TEST_CASES)): + assert series[row_index] is pandas.NaT + + @pytest.mark.parametrize( "value, error", [ diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index db533f5..bdfc48b 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -73,8 +73,14 @@ def test_box_func(): # Fractional seconds can cause rounding problems if cast to float. See: # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 ("0:0:59.876543", datetime.time(0, 0, 59, 876543)), + ( + numpy.datetime64("1970-01-01 00:00:59.876543"), + datetime.time(0, 0, 59, 876543), + ), ("01:01:01.010101", datetime.time(1, 1, 1, 10101)), + (pandas.Timestamp("1970-01-01 01:01:01.010101"), datetime.time(1, 1, 1, 10101)), ("09:09:09.090909", datetime.time(9, 9, 9, 90909)), + (datetime.time(9, 9, 9, 90909), datetime.time(9, 9, 9, 90909)), ("11:11:11.111111", datetime.time(11, 11, 11, 111111)), ("19:16:23.987654", datetime.time(19, 16, 23, 987654)), # Microsecond precision From b771e050acd2bdbf469a97f7477036c159b500f8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 09:47:20 -0500 Subject: [PATCH 21/26] fix: address failing 2D array compliance tests in DateArray (#64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * add unit tests Co-authored-by: Owl Bot --- db_dtypes/core.py | 44 ++--- db_dtypes/pandas_backports.py | 4 - .../date/test_date_compliance_1_5.py | 35 ++++ tests/unit/test_date.py | 150 ++++++++++++++++++ 4 files changed, 210 insertions(+), 23 deletions(-) create mode 100644 tests/compliance/date/test_date_compliance_1_5.py diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 7879571..5d5c053 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -152,29 +152,35 @@ def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): result = pandas_backports.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) - return self._box_func(result) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): pandas_backports.numpy_validate_max((), kwargs) result = pandas_backports.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) - return self._box_func(result) - - if pandas_release >= (1, 2): - - def median( - self, - *, - axis: Optional[int] = None, - out=None, - overwrite_input: bool = False, - keepdims: bool = False, - skipna: bool = True, - ): - pandas_backports.numpy_validate_median( - (), - {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, - ) - result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + def median( + self, + *, + axis: Optional[int] = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + if not hasattr(pandas_backports, "numpy_validate_median"): + raise NotImplementedError("Need pandas 1.3 or later to calculate median.") + + pandas_backports.numpy_validate_median( + (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, + ) + result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) + if axis is None or self.ndim == 1: return self._box_func(result) + return self._from_backing_data(result) diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index 0e39986..0966e83 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -106,12 +106,8 @@ def __ge__(self, other): # See: https://github.com/pandas-dev/pandas/pull/45544 @import_default("pandas.core.arrays._mixins", pandas_release < (1, 3)) class NDArrayBackedExtensionArray(pandas.core.arrays.base.ExtensionArray): - - ndim = 1 - def __init__(self, values, dtype): assert isinstance(values, numpy.ndarray) - assert values.ndim == 1 self._ndarray = values self._dtype = dtype diff --git a/tests/compliance/date/test_date_compliance_1_5.py b/tests/compliance/date/test_date_compliance_1_5.py new file mode 100644 index 0000000..9c6da24 --- /dev/null +++ b/tests/compliance/date/test_date_compliance_1_5.py @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base +import pytest + +# NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 +pytest.importorskip("pandas", minversion="1.5.0dev") + + +class Test2DCompat(base.NDArrayBacked2DTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index fb41620..b8f36f6 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -16,6 +16,7 @@ import operator import numpy +import numpy.testing import pandas import pandas.testing import pytest @@ -154,6 +155,100 @@ def test_date_parsing_errors(value, error): pandas.Series([value], dtype="dbdate") +def test_date_max_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.max(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.max(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1990-03-03"), + numpy.datetime64("1991-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + + +def test_date_min_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.min(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.min(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1971-02-02"), + numpy.datetime64("1972-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + + @pytest.mark.skipif( not hasattr(pandas_backports, "numpy_validate_median"), reason="median not available with this version of pandas", @@ -178,3 +273,58 @@ def test_date_parsing_errors(value, error): def test_date_median(values, expected): series = pandas.Series(values, dtype="dbdate") assert series.median() == expected + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +def test_date_median_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=0), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=1), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1980-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1982-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) From 42bc2d90174d152dfed782acf77016da55dbdaca Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 15:08:16 -0500 Subject: [PATCH 22/26] fix: avoid TypeError when using sorted search (#84) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * fix: avoid TypeError when using sorted search * add unit tests * fix: dbdate and dbtime support set item * add TestMethods * add unit test for search sorted Co-authored-by: Owl Bot --- db_dtypes/core.py | 8 ++ tests/compliance/conftest.py | 60 +++++++++++++ tests/compliance/date/conftest.py | 85 +++++++++++++++++++ tests/compliance/date/test_date_compliance.py | 29 +++++++ tests/unit/test_date.py | 27 ++++++ 5 files changed, 209 insertions(+) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 5d5c053..f577960 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -113,6 +113,14 @@ def _validate_scalar(self, value): """ return self._datetime(value) + def _validate_searchsorted_value(self, value): + """ + Convert a value for use in searching for a value in the backing numpy array. + + TODO: With pandas 2.0, this may be unnecessary. https://github.com/pandas-dev/pandas/pull/45544#issuecomment-1052809232 + """ + return self._validate_setitem_value(value) + def _validate_setitem_value(self, value): """ Convert a value for use in setting a value in the backing numpy array. diff --git a/tests/compliance/conftest.py b/tests/compliance/conftest.py index bc76692..54b767c 100644 --- a/tests/compliance/conftest.py +++ b/tests/compliance/conftest.py @@ -16,6 +16,28 @@ import pytest +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture(params=["ffill", "bfill"]) def fillna_method(request): """ @@ -28,6 +50,21 @@ def fillna_method(request): return request.param +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return object.__new__(object) + + @pytest.fixture def na_value(): return pandas.NaT @@ -51,3 +88,26 @@ def cmp(a, b): return a is pandas.NaT and a is b return cmp + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param diff --git a/tests/compliance/date/conftest.py b/tests/compliance/date/conftest.py index e25ccc9..6f0a816 100644 --- a/tests/compliance/date/conftest.py +++ b/tests/compliance/date/conftest.py @@ -20,6 +20,15 @@ from db_dtypes import DateArray, DateDtype +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + @pytest.fixture def data(): return DateArray( @@ -32,6 +41,52 @@ def data(): ) +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(1980, 1, 27), + None, + None, + datetime.date(1969, 12, 30), + datetime.date(1969, 12, 30), + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + ] + ) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + datetime.date(1969, 12, 30), + ] + ) + + @pytest.fixture def data_missing(): """Length-2 array with [NA, Valid] @@ -42,6 +97,36 @@ def data_missing(): return DateArray([None, datetime.date(2022, 1, 27)]) +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray([datetime.date(1980, 1, 27), None, datetime.date(1969, 12, 30)]) + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + @pytest.fixture def dtype(): return DateDtype() diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py index a805ecd..13327a7 100644 --- a/tests/compliance/date/test_date_compliance.py +++ b/tests/compliance/date/test_date_compliance.py @@ -20,7 +20,11 @@ https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py """ +import pandas from pandas.tests.extension import base +import pytest + +import db_dtypes class TestDtype(base.BaseDtypeTests): @@ -45,3 +49,28 @@ class TestGetitem(base.BaseGetitemTests): class TestMissing(base.BaseMissingTests): pass + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestMethods(base.BaseMethodsTests): + def test_combine_add(self): + pytest.skip("Cannot add dates.") + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + # Overridden from + # https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py + # to avoid difference in dtypes. + other = db_dtypes.DateArray(all_data[~all_data.isna()]) + else: + other = all_data + + result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index b8f36f6..bbe74cb 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -328,3 +328,30 @@ def test_date_median_2d(): ) ), ) + + +@pytest.mark.parametrize( + ("search_term", "expected_index"), + ( + (datetime.date(1899, 12, 31), 0), + (datetime.date(1900, 1, 1), 0), + (datetime.date(1920, 2, 2), 1), + (datetime.date(1930, 3, 3), 1), + (datetime.date(1950, 5, 5), 2), + (datetime.date(1990, 9, 9), 3), + (datetime.date(2012, 12, 12), 3), + (datetime.date(2022, 3, 24), 4), + ), +) +def test_date_searchsorted(search_term, expected_index): + test_series = pandas.Series( + [ + datetime.date(1900, 1, 1), + datetime.date(1930, 3, 3), + datetime.date(1980, 8, 8), + datetime.date(2012, 12, 12), + ], + dtype="dbdate", + ) + got = test_series.searchsorted(search_term) + assert got == expected_index From 7495698b3be3b7e8055ae450e24cd0e366b1b72a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 15:20:19 -0500 Subject: [PATCH 23/26] fix: allow comparison with scalar values (#88) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * fix: avoid TypeError when using sorted search * add unit tests * fix: dbdate and dbtime support set item * add TestMethods * fix: allow comparison with scalar values * correct behavior for comparison with different types and shapes * use same dtype in shape comparison tests Co-authored-by: Owl Bot --- db_dtypes/core.py | 6 +- tests/compliance/conftest.py | 62 +++++++++++++++++++ tests/compliance/date/test_date_compliance.py | 24 +++++++ tests/unit/test_dtypes.py | 10 +-- 4 files changed, 92 insertions(+), 10 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index f577960..68123e1 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -90,14 +90,14 @@ def _cmp_method(self, other, op): if is_scalar(other) and (pandas.isna(other) or type(other) == self.dtype.type): other = type(self)([other]) + if type(other) != type(self): + return NotImplemented + oshape = getattr(other, "shape", None) if oshape != self.shape and oshape != (1,) and self.shape != (1,): raise TypeError( "Can't compare arrays with different shapes", self.shape, oshape ) - - if type(other) != type(self): - return NotImplemented return op(self._ndarray, other._ndarray) def _from_factorized(self, unique, original): diff --git a/tests/compliance/conftest.py b/tests/compliance/conftest.py index 54b767c..b891ed6 100644 --- a/tests/compliance/conftest.py +++ b/tests/compliance/conftest.py @@ -12,10 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import operator + import pandas import pytest +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture(params=[True, False]) def as_frame(request): """ @@ -38,6 +51,36 @@ def as_series(request): return request.param +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """ + Whether to box the data in a Series + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture( + params=[ + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ] +) +def comparison_op(request): + """ + Fixture for operator module comparison functions. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + @pytest.fixture(params=["ffill", "bfill"]) def fillna_method(request): """ @@ -50,6 +93,25 @@ def fillna_method(request): return request.param +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: pandas.Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture def invalid_scalar(data): """ diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py index 13327a7..6281986 100644 --- a/tests/compliance/date/test_date_compliance.py +++ b/tests/compliance/date/test_date_compliance.py @@ -74,3 +74,27 @@ def test_value_counts(self, all_data, dropna): expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() self.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for arithmetic operations. + + +class TestComparisonOps(base.BaseComparisonOpsTests): + pass diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 66074d8..dc1613b 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -169,16 +169,12 @@ def test_timearray_comparisons( np.testing.assert_array_equal(comparisons[op](left, r), expected) np.testing.assert_array_equal(complements[op](left, r), ~expected) - # Bad shape - for bad_shape in ([], [1, 2, 3]): + # Bad shape, but same type + for bad_shape in ([], sample_values[:3]): with pytest.raises( TypeError, match="Can't compare arrays with different shapes" ): - comparisons[op](left, np.array(bad_shape)) - with pytest.raises( - TypeError, match="Can't compare arrays with different shapes" - ): - complements[op](left, np.array(bad_shape)) + comparisons[op](left, _cls(dtype)._from_sequence(bad_shape)) # Bad items for bad_items in ( From efe7e6d8953ebf8d2b4d9468c7c92638ea2ec9f9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 15:51:52 -0500 Subject: [PATCH 24/26] test: add final dbdate compliance tests and sort (#89) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * fix: avoid TypeError when using sorted search * add unit tests * fix: dbdate and dbtime support set item * add TestMethods * fix: allow comparison with scalar values * correct behavior for comparison with different types and shapes * use same dtype in shape comparison tests * test: add final dbdate compliance tests and sort * remove redundant index tests Co-authored-by: Owl Bot --- tests/compliance/date/test_date_compliance.py | 43 +++++++++++-------- .../date/test_date_compliance_1_5.py | 4 -- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py index 6281986..e19caf7 100644 --- a/tests/compliance/date/test_date_compliance.py +++ b/tests/compliance/date/test_date_compliance.py @@ -27,11 +27,18 @@ import db_dtypes -class TestDtype(base.BaseDtypeTests): +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/87): Add +# compliance tests for arithmetic operations. + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestComparisonOps(base.BaseComparisonOpsTests): pass -class TestInterface(base.BaseInterfaceTests): +class TestCasting(base.BaseCastingTests): pass @@ -39,7 +46,7 @@ class TestConstructors(base.BaseConstructorsTests): pass -class TestReshaping(base.BaseReshapingTests): +class TestDtype(base.BaseDtypeTests): pass @@ -47,12 +54,20 @@ class TestGetitem(base.BaseGetitemTests): pass -class TestMissing(base.BaseMissingTests): +class TestGroupby(base.BaseGroupbyTests): pass -# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add -# compliance tests for reduction operations. +class TestIndex(base.BaseIndexTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass class TestMethods(base.BaseMethodsTests): @@ -76,15 +91,7 @@ def test_value_counts(self, all_data, dropna): self.assert_series_equal(result, expected) -class TestCasting(base.BaseCastingTests): - pass - - -class TestGroupby(base.BaseGroupbyTests): - pass - - -class TestSetitem(base.BaseSetitemTests): +class TestParsing(base.BaseParsingTests): pass @@ -92,9 +99,9 @@ class TestPrinting(base.BasePrintingTests): pass -# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add -# compliance tests for arithmetic operations. +class TestReshaping(base.BaseReshapingTests): + pass -class TestComparisonOps(base.BaseComparisonOpsTests): +class TestSetitem(base.BaseSetitemTests): pass diff --git a/tests/compliance/date/test_date_compliance_1_5.py b/tests/compliance/date/test_date_compliance_1_5.py index 9c6da24..e8f2c93 100644 --- a/tests/compliance/date/test_date_compliance_1_5.py +++ b/tests/compliance/date/test_date_compliance_1_5.py @@ -29,7 +29,3 @@ class Test2DCompat(base.NDArrayBacked2DTests): pass - - -class TestIndex(base.BaseIndexTests): - pass From f14fb2bf78d8427b9546db4cdad1d893c1b1e5e1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 15:59:03 -0500 Subject: [PATCH 25/26] test: add dbtime compliance tests (#90) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * fix: avoid TypeError when using sorted search * add unit tests * fix: dbdate and dbtime support set item * add TestMethods * fix: allow comparison with scalar values * correct behavior for comparison with different types and shapes * use same dtype in shape comparison tests * test: add final dbdate compliance tests and sort * test: add dbtime compliance tests Co-authored-by: Owl Bot --- tests/compliance/time/conftest.py | 134 ++++++++++++++++++ tests/compliance/time/test_time_compliance.py | 107 ++++++++++++++ .../time/test_time_compliance_1_5.py | 31 ++++ 3 files changed, 272 insertions(+) create mode 100644 tests/compliance/time/conftest.py create mode 100644 tests/compliance/time/test_time_compliance.py create mode 100644 tests/compliance/time/test_time_compliance_1_5.py diff --git a/tests/compliance/time/conftest.py b/tests/compliance/time/conftest.py new file mode 100644 index 0000000..760a068 --- /dev/null +++ b/tests/compliance/time/conftest.py @@ -0,0 +1,134 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import numpy +import pytest + +from db_dtypes import TimeArray, TimeDtype + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data(): + return TimeArray( + numpy.arange( + datetime.datetime(1970, 1, 1), + datetime.datetime(1970, 1, 2), + datetime.timedelta(microseconds=864_123_456), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [ + datetime.time(11, 45, 29, 987_654), + datetime.time(11, 45, 29, 987_654), + None, + None, + datetime.time(0, 1, 2, 345_678), + datetime.time(0, 1, 2, 345_678), + datetime.time(11, 45, 29, 987_654), + datetime.time(23, 59, 59, 999_999), + ] + ) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [ + datetime.time(11, 45, 29, 987_654), + datetime.time(23, 59, 59, 999_999), + datetime.time(0, 1, 2, 345_678), + ] + ) + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid] + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray([None, datetime.time(13, 7, 42, 123_456)]) + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [datetime.time(13, 7, 42, 123_456), None, datetime.time(1, 2, 3, 456_789)] + ) + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def dtype(): + return TimeDtype() diff --git a/tests/compliance/time/test_time_compliance.py b/tests/compliance/time/test_time_compliance.py new file mode 100644 index 0000000..ab1e050 --- /dev/null +++ b/tests/compliance/time/test_time_compliance.py @@ -0,0 +1,107 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +import pandas +from pandas.tests.extension import base +import pytest + +import db_dtypes + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/87): Add +# compliance tests for arithmetic operations. + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestComparisonOps(base.BaseComparisonOpsTests): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + def test_combine_add(self): + pytest.skip("Cannot add dates.") + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + # Overridden from + # https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py + # to avoid difference in dtypes. + other = db_dtypes.TimeArray(all_data[~all_data.isna()]) + else: + other = all_data + + result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass diff --git a/tests/compliance/time/test_time_compliance_1_5.py b/tests/compliance/time/test_time_compliance_1_5.py new file mode 100644 index 0000000..e8f2c93 --- /dev/null +++ b/tests/compliance/time/test_time_compliance_1_5.py @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base +import pytest + +# NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 +pytest.importorskip("pandas", minversion="1.5.0dev") + + +class Test2DCompat(base.NDArrayBacked2DTests): + pass From caad6e0f6ae4a14359425e244805be894a702bf2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 24 Mar 2022 21:06:15 +0000 Subject: [PATCH 26/26] chore(main): release 0.4.0 (#62) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit :robot: I have created a release *beep* *boop* --- ## [0.4.0](https://github.com/googleapis/python-db-dtypes-pandas/compare/v0.3.1...v0.4.0) (2022-03-24) ### ⚠ BREAKING CHANGES * * fix: address failing compliance tests in DateArray and TimeArray * * fix: address failing compliance tests in DateArray and TimeArray * * fix: address failing compliance tests in DateArray and TimeArray * * fix: address failing compliance tests in DateArray and TimeArray * * fix: address failing compliance tests in DateArray and TimeArray * * fix: address failing compliance tests in DateArray and TimeArray * dbdate and dbtime dtypes return NaT instead of None for missing values ### Features * dbdate and dbtime support numpy.datetime64 values in array constructor ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) ### Bug Fixes * address failing 2D array compliance tests in DateArray ([#64](https://github.com/googleapis/python-db-dtypes-pandas/issues/64)) ([b771e05](https://github.com/googleapis/python-db-dtypes-pandas/commit/b771e050acd2bdbf469a97f7477036c159b500f8)) * address failing tests with pandas 1.5.0 ([#82](https://github.com/googleapis/python-db-dtypes-pandas/issues/82)) ([38ac28d](https://github.com/googleapis/python-db-dtypes-pandas/commit/38ac28d8b16f9b86b5029c85e45e9f2e034159b7)) * allow comparison with scalar values ([#88](https://github.com/googleapis/python-db-dtypes-pandas/issues/88)) ([7495698](https://github.com/googleapis/python-db-dtypes-pandas/commit/7495698b3be3b7e8055ae450e24cd0e366b1b72a)) * avoid TypeError when using sorted search ([#84](https://github.com/googleapis/python-db-dtypes-pandas/issues/84)) ([42bc2d9](https://github.com/googleapis/python-db-dtypes-pandas/commit/42bc2d90174d152dfed782acf77016da55dbdaca)) * correct TypeError and comparison issues discovered in DateArray compliance tests ([#79](https://github.com/googleapis/python-db-dtypes-pandas/issues/79)) ([1e979cf](https://github.com/googleapis/python-db-dtypes-pandas/commit/1e979cf360eb586e77b415f7b710a8a41c22e981)) * dbdate and dbtime support set item with null values ([#85](https://github.com/googleapis/python-db-dtypes-pandas/issues/85)) ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) * use `pandas.NaT` for missing values in dbdate and dbtime dtypes ([#67](https://github.com/googleapis/python-db-dtypes-pandas/issues/67)) ([f903c2c](https://github.com/googleapis/python-db-dtypes-pandas/commit/f903c2c68da1629241cf3bf37e1226babae669f4)) * use public pandas APIs where possible ([#60](https://github.com/googleapis/python-db-dtypes-pandas/issues/60)) ([e9d41d1](https://github.com/googleapis/python-db-dtypes-pandas/commit/e9d41d17b5d6a7d83c46e2497feb8e314545adcb)) ### Tests * add dbtime compliance tests ([#90](https://github.com/googleapis/python-db-dtypes-pandas/issues/90)) ([f14fb2b](https://github.com/googleapis/python-db-dtypes-pandas/commit/f14fb2bf78d8427b9546db4cdad1d893c1b1e5e1)) * add final dbdate compliance tests and sort ([#89](https://github.com/googleapis/python-db-dtypes-pandas/issues/89)) ([efe7e6d](https://github.com/googleapis/python-db-dtypes-pandas/commit/efe7e6d8953ebf8d2b4d9468c7c92638ea2ec9f9)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ db_dtypes/version.py | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0298e79..b46bc6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## [0.4.0](https://github.com/googleapis/python-db-dtypes-pandas/compare/v0.3.1...v0.4.0) (2022-03-24) + + +### ⚠ BREAKING CHANGES + +* dbdate and dbtime dtypes return NaT instead of None for missing values + +### Features + +* dbdate and dbtime support numpy.datetime64 values in array constructor ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) + + +### Bug Fixes + +* address failing 2D array compliance tests in DateArray ([#64](https://github.com/googleapis/python-db-dtypes-pandas/issues/64)) ([b771e05](https://github.com/googleapis/python-db-dtypes-pandas/commit/b771e050acd2bdbf469a97f7477036c159b500f8)) +* address failing tests with pandas 1.5.0 ([#82](https://github.com/googleapis/python-db-dtypes-pandas/issues/82)) ([38ac28d](https://github.com/googleapis/python-db-dtypes-pandas/commit/38ac28d8b16f9b86b5029c85e45e9f2e034159b7)) +* allow comparison with scalar values ([#88](https://github.com/googleapis/python-db-dtypes-pandas/issues/88)) ([7495698](https://github.com/googleapis/python-db-dtypes-pandas/commit/7495698b3be3b7e8055ae450e24cd0e366b1b72a)) +* avoid TypeError when using sorted search ([#84](https://github.com/googleapis/python-db-dtypes-pandas/issues/84)) ([42bc2d9](https://github.com/googleapis/python-db-dtypes-pandas/commit/42bc2d90174d152dfed782acf77016da55dbdaca)) +* correct TypeError and comparison issues discovered in DateArray compliance tests ([#79](https://github.com/googleapis/python-db-dtypes-pandas/issues/79)) ([1e979cf](https://github.com/googleapis/python-db-dtypes-pandas/commit/1e979cf360eb586e77b415f7b710a8a41c22e981)) +* dbdate and dbtime support set item with null values ([#85](https://github.com/googleapis/python-db-dtypes-pandas/issues/85)) ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) +* use `pandas.NaT` for missing values in dbdate and dbtime dtypes ([#67](https://github.com/googleapis/python-db-dtypes-pandas/issues/67)) ([f903c2c](https://github.com/googleapis/python-db-dtypes-pandas/commit/f903c2c68da1629241cf3bf37e1226babae669f4)) +* use public pandas APIs where possible ([#60](https://github.com/googleapis/python-db-dtypes-pandas/issues/60)) ([e9d41d1](https://github.com/googleapis/python-db-dtypes-pandas/commit/e9d41d17b5d6a7d83c46e2497feb8e314545adcb)) + + +### Tests + +* add dbtime compliance tests ([#90](https://github.com/googleapis/python-db-dtypes-pandas/issues/90)) ([f14fb2b](https://github.com/googleapis/python-db-dtypes-pandas/commit/f14fb2bf78d8427b9546db4cdad1d893c1b1e5e1)) +* add final dbdate compliance tests and sort ([#89](https://github.com/googleapis/python-db-dtypes-pandas/issues/89)) ([efe7e6d](https://github.com/googleapis/python-db-dtypes-pandas/commit/efe7e6d8953ebf8d2b4d9468c7c92638ea2ec9f9)) + ### [0.3.1](https://www.github.com/googleapis/python-db-dtypes-pandas/compare/v0.3.0...v0.3.1) (2021-12-04) diff --git a/db_dtypes/version.py b/db_dtypes/version.py index b118f08..c0c2669 100644 --- a/db_dtypes/version.py +++ b/db_dtypes/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.3.1" +__version__ = "0.4.0"