diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7519fa3..44c78f7 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,16 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:0e18b9475fbeb12d9ad4302283171edebb6baf2dfca1bd215ee3b34ed79d95d7 + digest: sha256:4e1991042fe54b991db9ca17c8fb386e61b22fe4d1472a568bf0fcac85dcf5d3 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f8714a3..193b436 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,5 +8,5 @@ # @googleapis/yoshi-python @googleapis/api-bigquery are the default owners for changes in this repo * @googleapis/yoshi-python @googleapis/api-bigquery -# @googleapis/python-samples-owners @googleapis/api-bigquery are the default owners for samples changes -/samples/ @googleapis/python-samples-owners @googleapis/api-bigquery +# @googleapis/python-samples-reviewers @googleapis/api-bigquery are the default owners for samples changes +/samples/ @googleapis/python-samples-reviewers @googleapis/api-bigquery diff --git a/.github/release-please.yml b/.github/release-please.yml index 4507ad0..466597e 100644 --- a/.github/release-please.yml +++ b/.github/release-please.yml @@ -1 +1,2 @@ releaseType: python +handleGHRelease: true diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml new file mode 100644 index 0000000..d4ca941 --- /dev/null +++ b/.github/release-trigger.yml @@ -0,0 +1 @@ +enabled: true diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml new file mode 100644 index 0000000..eca8cc2 --- /dev/null +++ b/.github/workflows/compliance.yml @@ -0,0 +1,48 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + compliance: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance tests + env: + COVERAGE_FILE: .coverage-compliance-${{ matrix.python }} + run: | + nox -s compliance + compliance-prerelease: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance prerelease tests + env: + COVERAGE_FILE: .coverage-compliance-prerelease-${{ matrix.python }} + run: | + nox -s compliance_prerelease diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..b46d730 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,38 @@ +on: + pull_request: + branches: + - main +name: docs +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docs + run: | + nox -s docs + docfx: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docfx + run: | + nox -s docfx diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..f512a49 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,25 @@ +on: + pull_request: + branches: + - main +name: lint +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run lint + run: | + nox -s lint + - name: Run lint_setup_py + run: | + nox -s lint_setup_py diff --git a/.github/workflows/unittest-prerelease.yml b/.github/workflows/unittest-prerelease.yml new file mode 100644 index 0000000..a11568a --- /dev/null +++ b/.github/workflows/unittest-prerelease.yml @@ -0,0 +1,32 @@ +on: + pull_request: + branches: + - main +name: unittest-prerelease +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-prerelease-${{ matrix.python }} + run: | + nox -s unit_prerelease + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 0000000..e5be6ed --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,57 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.6', '3.7', '3.8', '3.9', '3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s unit-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} + + cover: + runs-on: ubuntu-latest + needs: + - unit + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-results/ + - name: Report coverage results + run: | + coverage combine .coverage-results/.coverage* + coverage report --show-missing --fail-under=100 diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 297fec5..aa6052b 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") cd github/python-db-dtypes-pandas python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 5917933..44cc384 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,8 +23,18 @@ env_vars: { value: "github/python-db-dtypes-pandas/.kokoro/release.sh" } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google-cloud-pypi-token-keystore-1" + } + } +} + # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } diff --git a/.repo-metadata.json b/.repo-metadata.json index 0f8ca37..176e5d9 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -2,7 +2,7 @@ "name": "db-dtypes", "name_pretty": "Pandas Data Types for SQL systems (BigQuery, Spanner)", "client_documentation": "https://googleapis.dev/python/db-dtypes/latest/index.html", - "release_level": "beta", + "release_level": "preview", "language": "python", "library_type": "INTEGRATION", "repo": "googleapis/python-db-dtypes-pandas", diff --git a/CHANGELOG.md b/CHANGELOG.md index 0298e79..b46bc6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## [0.4.0](https://github.com/googleapis/python-db-dtypes-pandas/compare/v0.3.1...v0.4.0) (2022-03-24) + + +### ⚠ BREAKING CHANGES + +* dbdate and dbtime dtypes return NaT instead of None for missing values + +### Features + +* dbdate and dbtime support numpy.datetime64 values in array constructor ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) + + +### Bug Fixes + +* address failing 2D array compliance tests in DateArray ([#64](https://github.com/googleapis/python-db-dtypes-pandas/issues/64)) ([b771e05](https://github.com/googleapis/python-db-dtypes-pandas/commit/b771e050acd2bdbf469a97f7477036c159b500f8)) +* address failing tests with pandas 1.5.0 ([#82](https://github.com/googleapis/python-db-dtypes-pandas/issues/82)) ([38ac28d](https://github.com/googleapis/python-db-dtypes-pandas/commit/38ac28d8b16f9b86b5029c85e45e9f2e034159b7)) +* allow comparison with scalar values ([#88](https://github.com/googleapis/python-db-dtypes-pandas/issues/88)) ([7495698](https://github.com/googleapis/python-db-dtypes-pandas/commit/7495698b3be3b7e8055ae450e24cd0e366b1b72a)) +* avoid TypeError when using sorted search ([#84](https://github.com/googleapis/python-db-dtypes-pandas/issues/84)) ([42bc2d9](https://github.com/googleapis/python-db-dtypes-pandas/commit/42bc2d90174d152dfed782acf77016da55dbdaca)) +* correct TypeError and comparison issues discovered in DateArray compliance tests ([#79](https://github.com/googleapis/python-db-dtypes-pandas/issues/79)) ([1e979cf](https://github.com/googleapis/python-db-dtypes-pandas/commit/1e979cf360eb586e77b415f7b710a8a41c22e981)) +* dbdate and dbtime support set item with null values ([#85](https://github.com/googleapis/python-db-dtypes-pandas/issues/85)) ([1db1357](https://github.com/googleapis/python-db-dtypes-pandas/commit/1db1357186b234a28b2ced10174bbd06e2f0ab73)) +* use `pandas.NaT` for missing values in dbdate and dbtime dtypes ([#67](https://github.com/googleapis/python-db-dtypes-pandas/issues/67)) ([f903c2c](https://github.com/googleapis/python-db-dtypes-pandas/commit/f903c2c68da1629241cf3bf37e1226babae669f4)) +* use public pandas APIs where possible ([#60](https://github.com/googleapis/python-db-dtypes-pandas/issues/60)) ([e9d41d1](https://github.com/googleapis/python-db-dtypes-pandas/commit/e9d41d17b5d6a7d83c46e2497feb8e314545adcb)) + + +### Tests + +* add dbtime compliance tests ([#90](https://github.com/googleapis/python-db-dtypes-pandas/issues/90)) ([f14fb2b](https://github.com/googleapis/python-db-dtypes-pandas/commit/f14fb2bf78d8427b9546db4cdad1d893c1b1e5e1)) +* add final dbdate compliance tests and sort ([#89](https://github.com/googleapis/python-db-dtypes-pandas/issues/89)) ([efe7e6d](https://github.com/googleapis/python-db-dtypes-pandas/commit/efe7e6d8953ebf8d2b4d9468c7c92638ea2ec9f9)) + ### [0.3.1](https://www.github.com/googleapis/python-db-dtypes-pandas/compare/v0.3.0...v0.3.1) (2021-12-04) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 056be28..7889dac 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -22,13 +22,7 @@ import numpy import packaging.version import pandas -import pandas.compat.numpy.function -import pandas.core.algorithms -import pandas.core.arrays -import pandas.core.dtypes.base -import pandas.core.dtypes.dtypes -import pandas.core.dtypes.generic -import pandas.core.nanops +import pandas.api.extensions import pyarrow import pyarrow.compute @@ -40,11 +34,19 @@ time_dtype_name = "dbtime" _EPOCH = datetime.datetime(1970, 1, 1) _NPEPOCH = numpy.datetime64(_EPOCH) +_NP_DTYPE = "datetime64[ns]" + +# Numpy converts datetime64 scalars to datetime.datetime only if microsecond or +# smaller precision is used. +# +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep +# nanosecond precision when boxing scalars. +_NP_BOX_DTYPE = "datetime64[us]" pandas_release = packaging.version.parse(pandas.__version__).release -@pandas.core.dtypes.dtypes.register_extension_dtype +@pandas.api.extensions.register_extension_dtype class TimeDtype(core.BaseDatetimeDtype): """ Extension dtype for time data. @@ -104,6 +106,9 @@ def _datetime( r"(?:\.(?P\d*))?)?)?\s*$" ).match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.time. if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)): scalar = ( @@ -113,8 +118,8 @@ def _datetime( .as_py() ) - if scalar is None: - return None + if pandas.isna(scalar): + return numpy.datetime64("NaT") if isinstance(scalar, datetime.time): return pandas.Timestamp( year=1970, @@ -151,16 +156,18 @@ def _datetime( raise TypeError("Invalid value type", scalar) def _box_func(self, x): - if pandas.isnull(x): - return None + if pandas.isna(x): + return pandas.NaT try: - return x.astype("\d+)-(?P\d+)-(?P\d+)\s*$").match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.date. if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)): scalar = scalar.as_py() - if scalar is None: - return None + if pandas.isna(scalar): + return numpy.datetime64("NaT") elif isinstance(scalar, datetime.date): return pandas.Timestamp( year=scalar.year, month=scalar.month, day=scalar.day @@ -256,13 +266,15 @@ def _datetime( raise TypeError("Invalid value type", scalar) def _box_func(self, x): - if pandas.isnull(x): - return None + if pandas.isna(x): + return pandas.NaT try: - return x.astype("= (1, 2): - - def median( - self, - *, - axis: Optional[int] = None, - out=None, - overwrite_input: bool = False, - keepdims: bool = False, - skipna: bool = True, - ): - pandas.compat.numpy.function.validate_median( - (), - {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, - ) - result = pandas.core.nanops.nanmedian( - self._ndarray, axis=axis, skipna=skipna - ) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + def median( + self, + *, + axis: Optional[int] = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + if not hasattr(pandas_backports, "numpy_validate_median"): + raise NotImplementedError("Need pandas 1.3 or later to calculate median.") + + pandas_backports.numpy_validate_median( + (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, + ) + result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) + if axis is None or self.ndim == 1: return self._box_func(result) + return self._from_backing_data(result) diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index 003224f..0966e83 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -20,15 +20,32 @@ """ import operator +from typing import Any import numpy import packaging.version import pandas -from pandas._libs.lib import is_integer +from pandas.api.types import is_integer +import pandas.compat.numpy.function +import pandas.core.nanops pandas_release = packaging.version.parse(pandas.__version__).release +# Create aliases for private methods in case they move in a future version. +nanall = pandas.core.nanops.nanall +nanany = pandas.core.nanops.nanany +nanmax = pandas.core.nanops.nanmax +nanmin = pandas.core.nanops.nanmin +numpy_validate_all = pandas.compat.numpy.function.validate_all +numpy_validate_any = pandas.compat.numpy.function.validate_any +numpy_validate_max = pandas.compat.numpy.function.validate_max +numpy_validate_min = pandas.compat.numpy.function.validate_min + +if pandas_release >= (1, 3): + nanmedian = pandas.core.nanops.nanmedian + numpy_validate_median = pandas.compat.numpy.function.validate_median + def import_default(module_name, force=False, default=None): """ @@ -55,6 +72,10 @@ def import_default(module_name, force=False, default=None): return getattr(module, name, default) +# pandas.core.arraylike.OpsMixin is private, but the related public API +# "ExtensionScalarOpsMixin" is not sufficient for adding dates to times. +# It results in unsupported operand type(s) for +: 'datetime.time' and +# 'datetime.date' @import_default("pandas.core.arraylike") class OpsMixin: def _cmp_method(self, other, op): # pragma: NO COVER @@ -81,14 +102,12 @@ def __ge__(self, other): __add__ = __radd__ = __sub__ = lambda self, other: NotImplemented +# TODO: use public API once pandas 1.5 / 2.x is released. +# See: https://github.com/pandas-dev/pandas/pull/45544 @import_default("pandas.core.arrays._mixins", pandas_release < (1, 3)) class NDArrayBackedExtensionArray(pandas.core.arrays.base.ExtensionArray): - - ndim = 1 - def __init__(self, values, dtype): assert isinstance(values, numpy.ndarray) - assert values.ndim == 1 self._ndarray = values self._dtype = dtype @@ -103,7 +122,7 @@ def __getitem__(self, index): return self.__class__(value, self._dtype) def __setitem__(self, index, value): - self._ndarray[index] = value + self._ndarray[index] = self._validate_setitem_value(value) def __len__(self): return len(self._ndarray) @@ -130,6 +149,28 @@ def copy(self): def repeat(self, n): return self.__class__(self._ndarray.repeat(n), self._dtype) + def take( + self, + indices, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: int = 0, + ): + from pandas.core.algorithms import take + + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) + return self._from_backing_data(new_data) + @classmethod def _concat_same_type(cls, to_concat, axis=0): dtypes = {str(x.dtype) for x in to_concat} diff --git a/db_dtypes/version.py b/db_dtypes/version.py index b118f08..c0c2669 100644 --- a/db_dtypes/version.py +++ b/db_dtypes/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.3.1" +__version__ = "0.4.0" diff --git a/noxfile.py b/noxfile.py index 5f48361..e3f4d5c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -19,6 +19,7 @@ from __future__ import absolute_import import os import pathlib +import re import shutil import nox @@ -37,6 +38,9 @@ nox.options.sessions = [ "lint", "unit", + "unit_prerelease", + "compliance", + "compliance_prerelease", "cover", "lint_setup_py", "blacken", @@ -77,7 +81,7 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session): +def default(session, tests_path): # Install all test dependencies, then install this package in-place. constraints_path = str( @@ -106,15 +110,109 @@ def default(session): "--cov-config=.coveragerc", "--cov-report=", "--cov-fail-under=0", - os.path.join("tests", "unit"), + tests_path, *session.posargs, ) +def prerelease(session, tests_path): + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + + # PyArrow prerelease packages are published to an alternative PyPI host. + # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + session.install( + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", + ) + session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", + "--pre", + "--upgrade", + "pandas", + ) + session.install( + "mock", + "asyncmock", + "pytest", + "pytest-cov", + "pytest-asyncio", + "-c", + constraints_path, + ) + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".") + + # Print out prerelease package versions. + session.run("python", "-m", "pip", "freeze") + + # Run py.test against the unit tests. + session.run( + "py.test", + "--quiet", + f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml", + "--cov=db_dtypes", + "--cov=tests/unit", + "--cov-append", + "--cov-config=.coveragerc", + "--cov-report=", + "--cov-fail-under=0", + tests_path, + *session.posargs, + ) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance(session): + """Run the compliance test suite.""" + default(session, os.path.join("tests", "compliance")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance_prerelease(session): + """Run the compliance test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "compliance")) + + @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" - default(session) + default(session, os.path.join("tests", "unit")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_prerelease(session): + """Run the unit test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "unit")) @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) diff --git a/owlbot.py b/owlbot.py index 30f3b3d..ec5a5bf 100644 --- a/owlbot.py +++ b/owlbot.py @@ -49,6 +49,10 @@ ["noxfile.py"], r"[\"']google[\"']", '"db_dtypes"', ) +s.replace( + ["noxfile.py"], r"import shutil", "import re\nimport shutil", +) + s.replace( ["noxfile.py"], "--cov=google", "--cov=db_dtypes", ) @@ -64,11 +68,129 @@ new_sessions = """ "lint", "unit", + "unit_prerelease", + "compliance", + "compliance_prerelease", "cover", """ s.replace(["noxfile.py"], old_sessions, new_sessions) +# Add compliance tests. +s.replace( + ["noxfile.py"], r"def default\(session\):", "def default(session, tests_path):" +) +s.replace(["noxfile.py"], r'os.path.join\("tests", "unit"\),', "tests_path,") +s.replace( + ["noxfile.py"], + r''' +@nox.session\(python=UNIT_TEST_PYTHON_VERSIONS\) +def unit\(session\): + """Run the unit test suite.""" + default\(session\) +''', + r''' +def prerelease(session, tests_path): + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + + # PyArrow prerelease packages are published to an alternative PyPI host. + # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + session.install( + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", + ) + session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", + "--pre", + "--upgrade", + "pandas", + ) + session.install( + "mock", + "asyncmock", + "pytest", + "pytest-cov", + "pytest-asyncio", + "-c", + constraints_path, + ) + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\\s*(\\S+)(?===\\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".") + + # Print out prerelease package versions. + session.run("python", "-m", "pip", "freeze") + + # Run py.test against the unit tests. + session.run( + "py.test", + "--quiet", + f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml", + "--cov=db_dtypes", + "--cov=tests/unit", + "--cov-append", + "--cov-config=.coveragerc", + "--cov-report=", + "--cov-fail-under=0", + tests_path, + *session.posargs, + ) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance(session): + """Run the compliance test suite.""" + default(session, os.path.join("tests", "compliance")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance_prerelease(session): + """Run the compliance test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "compliance")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS) +def unit(session): + """Run the unit test suite.""" + default(session, os.path.join("tests", "unit")) + + +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_prerelease(session): + """Run the unit test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "unit")) +''', +) + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 93a9122..85f5836 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -14,6 +14,7 @@ from __future__ import print_function +import glob import os from pathlib import Path import sys @@ -184,21 +185,34 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + # check for presence of tests + test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list.extend(glob.glob("tests")) + + if len(test_list) == 0: + print("No tests found, skipping directory.") + return + if TEST_CONFIG["pip_version_override"]: pip_version = TEST_CONFIG["pip_version_override"] session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" + concurrent_args = [] if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): session.install("-r", "requirements.txt", "-c", "constraints.txt") else: session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() if os.path.exists("requirements-test.txt"): if os.path.exists("constraints-test.txt"): session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") else: session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) @@ -206,9 +220,14 @@ def _session_tests( if post_install: post_install(session) + if "pytest-parallel" in packages: + concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"]) + elif "pytest-xdist" in packages: + concurrent_args.extend(["-n", "auto"]) + session.run( "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), # Pytest will return 5 when no tests are collected. This can happen # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9270945..4f6bf64 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==6.2.5 +pytest==7.1.1 diff --git a/setup.py b/setup.py index 8def678..7ad5119 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ dependencies = [ "packaging >= 17.0", "pandas >= 0.24.2, < 2.0dev", - "pyarrow>=3.0.0, <7.0dev", + "pyarrow>=3.0.0, <8.0dev", "numpy >= 1.16.6, < 2.0dev", ] diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index eebb9da..d814dcd 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1 +1,2 @@ -sqlalchemy>=1.4.13 +# Make sure we test with pandas 1.3.0. The Python version isn't that relevant. +pandas==1.3.0 diff --git a/tests/compliance/conftest.py b/tests/compliance/conftest.py new file mode 100644 index 0000000..b891ed6 --- /dev/null +++ b/tests/compliance/conftest.py @@ -0,0 +1,175 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import operator + +import pandas +import pytest + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """ + Whether to box the data in a Series + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture( + params=[ + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ] +) +def comparison_op(request): + """ + Fixture for operator module comparison functions. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: pandas.Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return object.__new__(object) + + +@pytest.fixture +def na_value(): + return pandas.NaT + + +@pytest.fixture +def na_cmp(): + """ + Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + and + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_datetime.py + """ + + def cmp(a, b): + return a is pandas.NaT and a is b + + return cmp + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param diff --git a/tests/compliance/date/conftest.py b/tests/compliance/date/conftest.py new file mode 100644 index 0000000..6f0a816 --- /dev/null +++ b/tests/compliance/date/conftest.py @@ -0,0 +1,132 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import numpy +import pytest + +from db_dtypes import DateArray, DateDtype + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data(): + return DateArray( + numpy.arange( + datetime.datetime(1900, 1, 1), + datetime.datetime(2099, 12, 31), + datetime.timedelta(days=731), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(1980, 1, 27), + None, + None, + datetime.date(1969, 12, 30), + datetime.date(1969, 12, 30), + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + ] + ) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + datetime.date(1969, 12, 30), + ] + ) + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid] + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray([None, datetime.date(2022, 1, 27)]) + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray([datetime.date(1980, 1, 27), None, datetime.date(1969, 12, 30)]) + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def dtype(): + return DateDtype() diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py new file mode 100644 index 0000000..e19caf7 --- /dev/null +++ b/tests/compliance/date/test_date_compliance.py @@ -0,0 +1,107 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +import pandas +from pandas.tests.extension import base +import pytest + +import db_dtypes + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/87): Add +# compliance tests for arithmetic operations. + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestComparisonOps(base.BaseComparisonOpsTests): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + def test_combine_add(self): + pytest.skip("Cannot add dates.") + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + # Overridden from + # https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py + # to avoid difference in dtypes. + other = db_dtypes.DateArray(all_data[~all_data.isna()]) + else: + other = all_data + + result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass diff --git a/tests/compliance/date/test_date_compliance_1_5.py b/tests/compliance/date/test_date_compliance_1_5.py new file mode 100644 index 0000000..e8f2c93 --- /dev/null +++ b/tests/compliance/date/test_date_compliance_1_5.py @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base +import pytest + +# NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 +pytest.importorskip("pandas", minversion="1.5.0dev") + + +class Test2DCompat(base.NDArrayBacked2DTests): + pass diff --git a/tests/compliance/time/conftest.py b/tests/compliance/time/conftest.py new file mode 100644 index 0000000..760a068 --- /dev/null +++ b/tests/compliance/time/conftest.py @@ -0,0 +1,134 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import numpy +import pytest + +from db_dtypes import TimeArray, TimeDtype + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data(): + return TimeArray( + numpy.arange( + datetime.datetime(1970, 1, 1), + datetime.datetime(1970, 1, 2), + datetime.timedelta(microseconds=864_123_456), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [ + datetime.time(11, 45, 29, 987_654), + datetime.time(11, 45, 29, 987_654), + None, + None, + datetime.time(0, 1, 2, 345_678), + datetime.time(0, 1, 2, 345_678), + datetime.time(11, 45, 29, 987_654), + datetime.time(23, 59, 59, 999_999), + ] + ) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [ + datetime.time(11, 45, 29, 987_654), + datetime.time(23, 59, 59, 999_999), + datetime.time(0, 1, 2, 345_678), + ] + ) + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid] + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray([None, datetime.time(13, 7, 42, 123_456)]) + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return TimeArray( + [datetime.time(13, 7, 42, 123_456), None, datetime.time(1, 2, 3, 456_789)] + ) + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def dtype(): + return TimeDtype() diff --git a/tests/compliance/time/test_time_compliance.py b/tests/compliance/time/test_time_compliance.py new file mode 100644 index 0000000..ab1e050 --- /dev/null +++ b/tests/compliance/time/test_time_compliance.py @@ -0,0 +1,107 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +import pandas +from pandas.tests.extension import base +import pytest + +import db_dtypes + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/87): Add +# compliance tests for arithmetic operations. + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestComparisonOps(base.BaseComparisonOpsTests): + pass + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestIndex(base.BaseIndexTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + def test_combine_add(self): + pytest.skip("Cannot add dates.") + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + # Overridden from + # https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py + # to avoid difference in dtypes. + other = db_dtypes.TimeArray(all_data[~all_data.isna()]) + else: + other = all_data + + result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass diff --git a/tests/compliance/time/test_time_compliance_1_5.py b/tests/compliance/time/test_time_compliance_1_5.py new file mode 100644 index 0000000..e8f2c93 --- /dev/null +++ b/tests/compliance/time/test_time_compliance_1_5.py @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base +import pytest + +# NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 +pytest.importorskip("pandas", minversion="1.5.0dev") + + +class Test2DCompat(base.NDArrayBacked2DTests): + pass diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index b906f24..bbe74cb 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -13,34 +13,122 @@ # limitations under the License. import datetime +import operator +import numpy +import numpy.testing import pandas +import pandas.testing import pytest -# To register the types. -import db_dtypes # noqa +import db_dtypes +from db_dtypes import pandas_backports -@pytest.mark.parametrize( - "value, expected", - [ - # Min/Max values for pandas.Timestamp. - ("1677-09-22", datetime.date(1677, 9, 22)), - ("2262-04-11", datetime.date(2262, 4, 11)), - # Typical "zero" values. - ("1900-01-01", datetime.date(1900, 1, 1)), - ("1970-01-01", datetime.date(1970, 1, 1)), - # Assorted values. - ("1993-10-31", datetime.date(1993, 10, 31)), - ("2012-02-29", datetime.date(2012, 2, 29)), - ("2021-12-17", datetime.date(2021, 12, 17)), - ("2038-01-19", datetime.date(2038, 1, 19)), - ], -) +VALUE_PARSING_TEST_CASES = [ + # Min/Max values for pandas.Timestamp. + ("1677-09-22", datetime.date(1677, 9, 22)), + ("2262-04-11", datetime.date(2262, 4, 11)), + # Typical "zero" values. + ("1900-01-01", datetime.date(1900, 1, 1)), + ("1970-01-01", datetime.date(1970, 1, 1)), + # Assorted values. + ("1993-10-31", datetime.date(1993, 10, 31)), + (datetime.date(1993, 10, 31), datetime.date(1993, 10, 31)), + ("2012-02-29", datetime.date(2012, 2, 29)), + (numpy.datetime64("2012-02-29"), datetime.date(2012, 2, 29)), + ("2021-12-17", datetime.date(2021, 12, 17)), + (pandas.Timestamp("2021-12-17"), datetime.date(2021, 12, 17)), + ("2038-01-19", datetime.date(2038, 1, 19)), +] + +NULL_VALUE_TEST_CASES = [ + None, + pandas.NaT, + float("nan"), +] + +if hasattr(pandas, "NA"): + NULL_VALUE_TEST_CASES.append(pandas.NA) + + +def test_box_func(): + input_array = db_dtypes.DateArray([]) + input_datetime = datetime.datetime(2022, 3, 16) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + +def test_construct_from_string_with_nonstring(): + with pytest.raises(TypeError): + db_dtypes.DateDtype.construct_from_string(object()) + + +def test__cmp_method_with_scalar(): + input_array = db_dtypes.DateArray([datetime.date(1900, 1, 1)]) + got = input_array._cmp_method(datetime.date(1900, 1, 1), operator.eq) + assert got[0] + + +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) def test_date_parsing(value, expected): assert pandas.Series([value], dtype="dbdate")[0] == expected +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_parsing_null(value): + assert pandas.Series([value], dtype="dbdate")[0] is pandas.NaT + + +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) +def test_date_set_item(value, expected): + series = pandas.Series([None], dtype="dbdate") + series[0] = value + assert series[0] == expected + + +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_set_item_null(value): + series = pandas.Series(["1970-01-01"], dtype="dbdate") + series[0] = value + assert series[0] is pandas.NaT + + +def test_date_set_slice(): + series = pandas.Series([None, None, None], dtype="dbdate") + series[:] = [ + datetime.date(2022, 3, 21), + "2011-12-13", + numpy.datetime64("1998-09-04"), + ] + assert series[0] == datetime.date(2022, 3, 21) + assert series[1] == datetime.date(2011, 12, 13) + assert series[2] == datetime.date(1998, 9, 4) + + +def test_date_set_slice_null(): + series = pandas.Series(["1970-01-01"] * len(NULL_VALUE_TEST_CASES), dtype="dbdate") + series[:] = NULL_VALUE_TEST_CASES + for row_index in range(len(NULL_VALUE_TEST_CASES)): + assert series[row_index] is pandas.NaT + + @pytest.mark.parametrize( "value, error", [ @@ -65,3 +153,205 @@ def test_date_parsing(value, expected): def test_date_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbdate") + + +def test_date_max_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.max(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.max(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1990-03-03"), + numpy.datetime64("1991-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + + +def test_date_min_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.min(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.min(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1971-02-02"), + numpy.datetime64("1972-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +@pytest.mark.parametrize( + "values, expected", + [ + (["1970-01-01", "1900-01-01", "2000-01-01"], datetime.date(1970, 1, 1)), + ( + [ + None, + "1900-01-01", + pandas.NA if hasattr(pandas, "NA") else None, + pandas.NaT, + float("nan"), + ], + datetime.date(1900, 1, 1), + ), + (["2222-02-01", "2222-02-03"], datetime.date(2222, 2, 2)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbdate") + assert series.median() == expected + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +def test_date_median_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=0), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=1), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1980-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1982-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) + + +@pytest.mark.parametrize( + ("search_term", "expected_index"), + ( + (datetime.date(1899, 12, 31), 0), + (datetime.date(1900, 1, 1), 0), + (datetime.date(1920, 2, 2), 1), + (datetime.date(1930, 3, 3), 1), + (datetime.date(1950, 5, 5), 2), + (datetime.date(1990, 9, 9), 3), + (datetime.date(2012, 12, 12), 3), + (datetime.date(2022, 3, 24), 4), + ), +) +def test_date_searchsorted(search_term, expected_index): + test_series = pandas.Series( + [ + datetime.date(1900, 1, 1), + datetime.date(1930, 3, 3), + datetime.date(1980, 8, 8), + datetime.date(2012, 12, 12), + ], + dtype="dbdate", + ) + got = test_series.searchsorted(search_term) + assert got == expected_index diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index aacbf0b..dc1613b 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -23,8 +23,8 @@ pandas_release = packaging.version.parse(pd.__version__).release SAMPLE_RAW_VALUES = dict( - dbdate=(datetime.date(2021, 2, 2), "2021-2-3", None), - dbtime=(datetime.time(1, 2, 2), "1:2:3.5", None), + dbdate=(datetime.date(2021, 2, 2), "2021-2-3", pd.NaT), + dbtime=(datetime.time(1, 2, 2), "1:2:3.5", pd.NaT), ) SAMPLE_VALUES = dict( dbdate=( @@ -90,7 +90,7 @@ def test_array_construction(dtype, factory_method): factory = getattr(factory, factory_method) if factory_method == "_from_sequence_of_strings": sample_raw_values = [ - str(v) if v is not None else v for v in sample_raw_values + str(v) if not pd.isna(v) else v for v in sample_raw_values ] a = factory(sample_raw_values) assert len(a) == 3 @@ -98,11 +98,11 @@ def test_array_construction(dtype, factory_method): assert a.shape == (3,) sample_values = SAMPLE_VALUES[dtype] assert a[0], a[1] == sample_values[:2] - assert a[2] is None + assert pd.isna(a[2]) and a[2] is pd.NaT # implementation details: assert a.nbytes == 24 - assert np.array_equal( + np.testing.assert_array_equal( a._ndarray == np.array(SAMPLE_DT_VALUES[dtype][:2] + ("NaT",), dtype="datetime64[us]"), [True, True, False], @@ -121,7 +121,7 @@ def test_time_series_construction(dtype): s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype) assert len(s) == 3 assert s[0], s[1] == sample_values[:2] - assert s[2] is None + assert s[2] is pd.NaT assert s.nbytes == 24 assert isinstance(s.array, _cls(dtype)) @@ -166,19 +166,15 @@ def test_timearray_comparisons( # Note that the right_obs comparisons work because # they're called on right_obs rather then left, because # TimeArrays only support comparisons with TimeArrays. - assert np.array_equal(comparisons[op](left, r), expected) - assert np.array_equal(complements[op](left, r), ~expected) + np.testing.assert_array_equal(comparisons[op](left, r), expected) + np.testing.assert_array_equal(complements[op](left, r), ~expected) - # Bad shape - for bad_shape in ([], [1, 2, 3]): + # Bad shape, but same type + for bad_shape in ([], sample_values[:3]): with pytest.raises( TypeError, match="Can't compare arrays with different shapes" ): - comparisons[op](left, np.array(bad_shape)) - with pytest.raises( - TypeError, match="Can't compare arrays with different shapes" - ): - complements[op](left, np.array(bad_shape)) + comparisons[op](left, _cls(dtype)._from_sequence(bad_shape)) # Bad items for bad_items in ( @@ -186,10 +182,10 @@ def test_timearray_comparisons( [1], # a single-element array gets broadcast ): if op == "==": - assert np.array_equal( + np.testing.assert_array_equal( comparisons[op](left, np.array(bad_items)), np.array([False, False]) ) - assert np.array_equal( + np.testing.assert_array_equal( complements[op](left, np.array(bad_items)), np.array([True, True]) ) else: @@ -204,7 +200,7 @@ def test_timearray_comparisons( def test___getitem___arrayindex(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] - assert np.array_equal( + np.testing.assert_array_equal( cls(sample_values)[[1, 3]], cls([sample_values[1], sample_values[3]]), ) @@ -215,21 +211,23 @@ def test_timearray_slicing(dtype): b = a[:] assert b is not a assert b.__class__ == a.__class__ - assert np.array_equal(b, a) + np.testing.assert_array_equal(b._ndarray, a._ndarray) sample_values = SAMPLE_VALUES[dtype] cls = _cls(dtype) - assert np.array_equal(a[:1], cls._from_sequence(sample_values[:1])) + np.testing.assert_array_equal( + a[:1]._ndarray, cls._from_sequence(sample_values[:1])._ndarray + ) # Assignment works: a[:1] = cls._from_sequence([sample_values[2]]) - assert np.array_equal( + np.testing.assert_array_equal( a[:2], cls._from_sequence([sample_values[2], sample_values[1]]) ) # Series also work: s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype) - assert np.array_equal(s[:1].array, cls._from_sequence([sample_values[0]])) + np.testing.assert_array_equal(s[:1].array, cls._from_sequence([sample_values[0]])) @for_date_and_time @@ -238,9 +236,13 @@ def test_item_assignment(dtype): sample_values = SAMPLE_VALUES[dtype] cls = _cls(dtype) a[0] = sample_values[2] - assert np.array_equal(a, cls._from_sequence([sample_values[2], sample_values[1]])) + np.testing.assert_array_equal( + a, cls._from_sequence([sample_values[2], sample_values[1]]) + ) a[1] = None - assert np.array_equal(a, cls._from_sequence([sample_values[2], None])) + np.testing.assert_array_equal( + a._ndarray, cls._from_sequence([sample_values[2], None])._ndarray + ) @for_date_and_time @@ -249,9 +251,9 @@ def test_array_assignment(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] a[a.isna()] = sample_values[3] - assert np.array_equal(a, cls([sample_values[i] for i in (0, 1, 3)])) + np.testing.assert_array_equal(a, cls([sample_values[i] for i in (0, 1, 3)])) a[[0, 2]] = sample_values[2] - assert np.array_equal(a, cls([sample_values[i] for i in (2, 1, 2)])) + np.testing.assert_array_equal(a, cls([sample_values[i] for i in (2, 1, 2)])) @for_date_and_time @@ -270,7 +272,7 @@ def test_copy(dtype): b = a.copy() assert b is not a assert b._ndarray is not a._ndarray - assert np.array_equal(b, a) + np.testing.assert_array_equal(b, a) @for_date_and_time @@ -280,7 +282,7 @@ def test_from_ndarray_copy(dtype): a = cls._from_sequence(sample_values) b = cls(a._ndarray, copy=True) assert b._ndarray is not a._ndarray - assert np.array_equal(b, a) + np.testing.assert_array_equal(b, a) @for_date_and_time @@ -310,7 +312,7 @@ def test__validate_scalar_invalid(dtype): [ (False, None), (True, None), - (True, pd._libs.NaT if pd else None), + (True, pd.NaT if pd else None), (True, np.NaN if pd else None), (True, 42), ], @@ -326,7 +328,7 @@ def test_take(dtype, allow_fill, fill_value): else datetime.time(0, 42, 42, 424242) ) else: - expected_fill = None + expected_fill = pd.NaT b = a.take([1, -1, 3], allow_fill=True, fill_value=fill_value) expect = [sample_values[1], expected_fill, sample_values[3]] else: @@ -370,7 +372,7 @@ def test__concat_same_type_not_same_type(dtype): @for_date_and_time def test_dropna(dtype): - assert np.array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2]) + np.testing.assert_array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2]) @pytest.mark.parametrize( @@ -398,14 +400,18 @@ def test_fillna(dtype, value, meth, limit, expect): elif value is not None: value = sample_values[value] expect = cls([None if i is None else sample_values[i] for i in expect]) - assert np.array_equal(a.fillna(value, meth, limit), expect) + np.testing.assert_array_equal( + a.fillna(value, meth, limit)._ndarray, expect._ndarray + ) @for_date_and_time def test_unique(dtype): cls = _cls(dtype) sample_values = SAMPLE_VALUES[dtype] - assert np.array_equal(cls(sample_values * 3).unique(), cls(sample_values),) + np.testing.assert_array_equal( + cls(sample_values * 3).unique(), cls(sample_values), + ) @for_date_and_time @@ -421,7 +427,7 @@ def test_astype_copy(dtype): b = a.astype(a.dtype, copy=True) assert b is not a assert b.__class__ is a.__class__ - assert np.array_equal(b, a) + np.testing.assert_array_equal(b._ndarray, a._ndarray) @pytest.mark.parametrize( @@ -452,7 +458,7 @@ def test_asdatetime(dtype, same): b = a.astype(dt, copy=copy) assert b is not a._ndarray - assert np.array_equal(b[:2], a._ndarray[:2]) + np.testing.assert_array_equal(b[:2], a._ndarray[:2]) assert pd.isna(b[2]) and str(b[2]) == "NaT" @@ -482,7 +488,7 @@ def test_astimedelta(dtype): a = _cls("dbtime")([t, None]) b = a.astype(dtype) - np.array_equal(b[:1], expect) + np.testing.assert_array_equal(b[:1], expect) assert pd.isna(b[1]) and str(b[1]) == "NaT" @@ -523,7 +529,7 @@ def test_min_max_median(dtype): a = cls(data) assert a.min() == sample_values[0] assert a.max() == sample_values[-1] - if pandas_release >= (1, 2): + if pandas_release >= (1, 3): assert ( a.median() == datetime.time(1, 2, 4) if dtype == "dbtime" @@ -531,26 +537,26 @@ def test_min_max_median(dtype): ) empty = cls([]) - assert empty.min() is None - assert empty.max() is None - if pandas_release >= (1, 2): - assert empty.median() is None + assert empty.min() is pd.NaT + assert empty.max() is pd.NaT + if pandas_release >= (1, 3): + assert empty.median() is pd.NaT empty = cls([None]) - assert empty.min() is None - assert empty.max() is None - assert empty.min(skipna=False) is None - assert empty.max(skipna=False) is None - if pandas_release >= (1, 2): + assert empty.min() is pd.NaT + assert empty.max() is pd.NaT + assert empty.min(skipna=False) is pd.NaT + assert empty.max(skipna=False) is pd.NaT + if pandas_release >= (1, 3): with pytest.warns(RuntimeWarning, match="empty slice"): # It's weird that we get the warning here, and not # below. :/ - assert empty.median() is None - assert empty.median(skipna=False) is None + assert empty.median() is pd.NaT + assert empty.median(skipna=False) is pd.NaT a = _make_one(dtype) assert a.min() == sample_values[0] assert a.max() == sample_values[1] - if pandas_release >= (1, 2): + if pandas_release >= (1, 3): assert ( a.median() == datetime.time(1, 2, 2, 750000) if dtype == "dbtime" @@ -563,14 +569,14 @@ def test_date_add(): times = _cls("dbtime")(SAMPLE_VALUES["dbtime"]) expect = dates.astype("datetime64") + times.astype("timedelta64") - assert np.array_equal(dates + times, expect) - assert np.array_equal(times + dates, expect) + np.testing.assert_array_equal(dates + times, expect) + np.testing.assert_array_equal(times + dates, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) + np.testing.assert_array_equal(dates + do, expect) if pandas_release >= (1, 1): - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(do + dates, expect) with pytest.raises(TypeError): dates + times.astype("timedelta64") @@ -587,8 +593,8 @@ def test_date_add(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(dates + do, expect) + np.testing.assert_array_equal(do + dates, expect) def test_date_sub(): @@ -602,11 +608,11 @@ def test_date_sub(): ) ) expect = dates.astype("datetime64") - dates2.astype("datetime64") - assert np.array_equal(dates - dates2, expect) + np.testing.assert_array_equal(dates - dates2, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) with pytest.raises(TypeError): dates - 42 @@ -620,4 +626,4 @@ def test_date_sub(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index ba45949..bdfc48b 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -14,11 +14,38 @@ import datetime +import numpy import pandas import pytest # To register the types. import db_dtypes # noqa +from db_dtypes import pandas_backports + + +def test_box_func(): + input_array = db_dtypes.TimeArray([]) + input_datetime = datetime.datetime(1970, 1, 1, 1, 2, 3, 456789) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 @pytest.mark.parametrize( @@ -46,8 +73,14 @@ # Fractional seconds can cause rounding problems if cast to float. See: # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 ("0:0:59.876543", datetime.time(0, 0, 59, 876543)), + ( + numpy.datetime64("1970-01-01 00:00:59.876543"), + datetime.time(0, 0, 59, 876543), + ), ("01:01:01.010101", datetime.time(1, 1, 1, 10101)), + (pandas.Timestamp("1970-01-01 01:01:01.010101"), datetime.time(1, 1, 1, 10101)), ("09:09:09.090909", datetime.time(9, 9, 9, 90909)), + (datetime.time(9, 9, 9, 90909), datetime.time(9, 9, 9, 90909)), ("11:11:11.111111", datetime.time(11, 11, 11, 111111)), ("19:16:23.987654", datetime.time(19, 16, 23, 987654)), # Microsecond precision @@ -82,3 +115,32 @@ def test_time_parsing(value, expected): def test_time_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbtime") + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +@pytest.mark.parametrize( + "values, expected", + [ + ( + ["00:00:00", "12:34:56.789101", "23:59:59.999999"], + datetime.time(12, 34, 56, 789101), + ), + ( + [ + None, + "06:30:00", + pandas.NA if hasattr(pandas, "NA") else None, + pandas.NaT, + float("nan"), + ], + datetime.time(6, 30), + ), + (["2:22:21.222222", "2:22:23.222222"], datetime.time(2, 22, 22, 222222)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbtime") + assert series.median() == expected