diff --git a/CHANGELOG.md b/CHANGELOG.md index c39c5c47..5c4d72a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,23 @@ Older versions of this project were distributed as [pybigquery][0]. [2]: https://pypi.org/project/pybigquery/#history +## [1.1.0](https://www.github.com/googleapis/python-bigquery-sqlalchemy/compare/v1.0.0...v1.1.0) (2021-08-25) + + +### Features + +* Add geography support ([#228](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/228)) ([da7a403](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/da7a40309de6ca8063d6dcf6678de96a463344e6)) +* Handle passing of arrays to in statements more efficiently in SQLAlchemy 1.4 and higher ([#253](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/253)) ([7692704](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/76927044aa4d2be9d0f2ec47e917b28b97c18425)) + + +### Bug Fixes + +* dialect atribute wasn't provided ([#291](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/291)) ([2cf05a0](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/2cf05a0f37e32344b29ba2e92d709f7e51b20916)) +* distinct doesn't work as a column wrapper ([#275](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/275)) ([ad5baf8](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/ad5baf8a5351b9cdac4eda243e4042aeb551b937)) +* in-operator literal binds not handled properly ([#285](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/285)) ([e06bf74](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/e06bf74310fa27d5bc775e13beed4ab3a520e1aa)) +* supports_multivalues_insert dialect option was mispelled ([#278](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/278)) ([ec36a12](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/ec36a120c1607d9769105e873550bb727c504c93)) +* unnest failed in some cases (with table references failed when there were no other references to refrenced tables in a query) ([#290](https://www.github.com/googleapis/python-bigquery-sqlalchemy/issues/290)) ([9b5b002](https://www.github.com/googleapis/python-bigquery-sqlalchemy/commit/9b5b0025ec0b65177c0df02013ac387b3d3de472)) + ## [1.0.0](https://www.github.com/googleapis/python-bigquery-sqlalchemy/compare/v1.0.0-a1...v1.0.0) (2021-08-17) diff --git a/docs/alembic.rst b/docs/alembic.rst index 2f1e03ad..e83953a0 100644 --- a/docs/alembic.rst +++ b/docs/alembic.rst @@ -1,5 +1,5 @@ Alembic support ---------------- +^^^^^^^^^^^^^^^ `Alembic `_ is a lightweight database migration tool for usage with the SQLAlchemy Database Toolkit for diff --git a/docs/geography.rst b/docs/geography.rst new file mode 100644 index 00000000..aef79749 --- /dev/null +++ b/docs/geography.rst @@ -0,0 +1,87 @@ +Working with Geographic data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +BigQuery provides a `GEOGRAPHY data type +`_ +for `working with geographic data +`_, including: + +- Points, +- Linestrings, +- Polygons, and +- Collections of points, linestrings, and polygons. + +Geographic data uses the `WGS84 +`_ coordinate system. + +To define a geography column, use the `GEOGRAPHY` data type imported +from the `sqlalchemy_bigquery` module: + +.. literalinclude:: samples/snippets/geography.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_create_table_with_geography] + :end-before: [END bigquery_sqlalchemy_create_table_with_geography] + +BigQuery has a variety of `SQL geographic functions +`_ +for working with geographic data. Among these are functions for +converting between SQL geometry objects and `standard text (WKT) and +binary (WKB) representations +`_. + +Geography data is typically represented in Python as text strings in +WKT format or as `WKB` objects, which contain binary data in WKB +format. Querying geographic data returns `WKB` objects and `WKB` +objects may be used in queries. When +calling spatial functions that expect geographic arguments, text +arguments are automatically coerced to geography. + +Inserting data +~~~~~~~~~~~~~~ + +When inserting geography data, you can pass WKT strings, `WKT` objects, +or `WKB` objects: + +.. literalinclude:: samples/snippets/geography.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_insert_geography] + :end-before: [END bigquery_sqlalchemy_insert_geography] + +Note that in the `lake3` example, we got a `WKB` object by creating a +`WKT` object and getting its `wkb` property. Normally, we'd get `WKB` +objects as results of previous queries. + +Queries +~~~~~~~ + +When performing spacial queries, and geography objects are expected, +you can to pass `WKB` or `WKT` objects: + +.. literalinclude:: samples/snippets/geography.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_query_geography_wkb] + :end-before: [END bigquery_sqlalchemy_query_geography_wkb] + +In this example, we passed the `geog` attribute of `lake2`, which is a WKB object. + +Or you can pass strings in WKT format: + +.. literalinclude:: samples/snippets/geography.py + :language: python + :dedent: 4 + :start-after: [START bigquery_sqlalchemy_query_geography_text] + :end-before: [END bigquery_sqlalchemy_query_geography_text] + +Installing geography support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To get geography support, you need to install `sqlalchemy-bigquery` +with the `geography` extra, or separately install `GeoAlchemy2` and +`shapely`. + +.. code-block:: console + + pip install 'sqlalchemy-bigquery[geography]' diff --git a/docs/index.rst b/docs/index.rst index eef073c6..4fe42891 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,9 @@ :maxdepth: 2 README + geography alembic + reference Changelog --------- diff --git a/docs/reference.rst b/docs/reference.rst new file mode 100644 index 00000000..9f8cabef --- /dev/null +++ b/docs/reference.rst @@ -0,0 +1,12 @@ +API Reference +^^^^^^^^^^^^^ + +Geography +~~~~~~~~~ + +.. autoclass:: sqlalchemy_bigquery.geography.GEOGRAPHY + :exclude-members: bind_expression, ElementType, bind_processor + +.. automodule:: sqlalchemy_bigquery.geography + :members: WKB, WKT + :exclude-members: GEOGRAPHY diff --git a/docs/samples b/docs/samples new file mode 120000 index 00000000..e804737e --- /dev/null +++ b/docs/samples @@ -0,0 +1 @@ +../samples \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index 7c2097ab..823c72c2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -82,22 +82,6 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def install_alembic_for_python_38(session, constraints_path): - """ - install alembic for Python 3.8 unit and system tests - - We do not require alembic and most tests should run without it, however - - - We run some unit tests (Python 3.8) to cover the alembic - registration that happens when alembic is installed. - - - We have a system test that demonstrates working with alembic and - proves that the things we think should work do work. :) - """ - if session.python == "3.8": - session.install("alembic", "-c", constraints_path) - - def default(session): # Install all test dependencies, then install this package in-place. @@ -114,8 +98,13 @@ def default(session): constraints_path, ) - install_alembic_for_python_38(session, constraints_path) - session.install("-e", ".", "-c", constraints_path) + if session.python == "3.8": + extras = "[tests,alembic]" + elif session.python == "3.9": + extras = "[tests,geography]" + else: + extras = "[tests]" + session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -167,8 +156,13 @@ def system(session): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) - install_alembic_for_python_38(session, constraints_path) - session.install("-e", ".", "-c", constraints_path) + if session.python == "3.8": + extras = "[tests,alembic]" + elif session.python == "3.9": + extras = "[tests,geography]" + else: + extras = "[tests]" + session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the system tests. if system_test_exists: @@ -216,7 +210,13 @@ def compliance(session): "-c", constraints_path, ) - session.install("-e", ".", "-c", constraints_path) + if session.python == "3.8": + extras = "[tests,alembic]" + elif session.python == "3.9": + extras = "[tests,geography]" + else: + extras = "[tests]" + session.install("-e", f".{extras}", "-c", constraints_path) session.run( "py.test", @@ -251,7 +251,9 @@ def docs(session): """Build the docs for this library.""" session.install("-e", ".") - session.install("sphinx==4.0.1", "alabaster", "recommonmark") + session.install( + "sphinx==4.0.1", "alabaster", "geoalchemy2", "shapely", "recommonmark" + ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( @@ -274,7 +276,12 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.1", + "alabaster", + "geoalchemy2", + "shapely", + "recommonmark", + "gcp-sphinx-docfx-yaml", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/owlbot.py b/owlbot.py index e63929a0..b6cfe7ff 100644 --- a/owlbot.py +++ b/owlbot.py @@ -18,7 +18,7 @@ import synthtool as s from synthtool import gcp - +from synthtool.languages import python REPO_ROOT = pathlib.Path(__file__).parent.absolute() @@ -27,10 +27,19 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- +extras = ["tests"] +extras_by_python = { + "3.8": ["tests", "alembic"], + "3.9": ["tests", "geography"], +} templated_files = common.py_library( unit_test_python_versions=["3.6", "3.7", "3.8", "3.9"], system_test_python_versions=["3.8", "3.9"], - cov_level=100 + cov_level=100, + unit_test_extras=extras, + unit_test_extras_by_python=extras_by_python, + system_test_extras=extras, + system_test_extras_by_python=extras_by_python, ) s.move(templated_files, excludes=[ # sqlalchemy-bigquery was originally licensed MIT @@ -77,37 +86,6 @@ def place_before(path, text, *before_text, escape=None): "nox.options.stop_on_first_error = True", ) -install_alembic_for_python_38 = ''' -def install_alembic_for_python_38(session, constraints_path): - """ - install alembic for Python 3.8 unit and system tests - - We do not require alembic and most tests should run without it, however - - - We run some unit tests (Python 3.8) to cover the alembic - registration that happens when alembic is installed. - - - We have a system test that demonstrates working with alembic and - proves that the things we think should work do work. :) - """ - if session.python == "3.8": - session.install("alembic", "-c", constraints_path) - - -''' - -place_before( - "noxfile.py", - "def default", - install_alembic_for_python_38, - ) - -place_before( - "noxfile.py", - ' session.install("-e", ".", ', - " install_alembic_for_python_38(session, constraints_path)", - escape='(') - old_sessions = ''' "unit", "system", @@ -125,6 +103,9 @@ def install_alembic_for_python_38(session, constraints_path): s.replace( ["noxfile.py"], old_sessions, new_sessions) +# Maybe we can get rid of this when we don't need pytest-rerunfailures, +# which we won't need when BQ retries itself: +# https://github.com/googleapis/python-bigquery/pull/837 compliance = ''' @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def compliance(session): @@ -153,7 +134,13 @@ def compliance(session): "-c", constraints_path, ) - session.install("-e", ".", "-c", constraints_path) + if session.python == "3.8": + extras = "[tests,alembic]" + elif session.python == "3.9": + extras = "[tests,geography]" + else: + extras = "[tests]" + session.install("-e", f".{extras}", "-c", constraints_path) session.run( "py.test", @@ -180,6 +167,7 @@ def compliance(session): escape="()", ) +s.replace(["noxfile.py"], '"alabaster"', '"alabaster", "geoalchemy2", "shapely"') @@ -201,6 +189,12 @@ def compliance(session): """ ) +# ---------------------------------------------------------------------------- +# Samples templates +# ---------------------------------------------------------------------------- + +python.py_samples(skip_readmes=True) + # ---------------------------------------------------------------------------- # Final cleanup # ---------------------------------------------------------------------------- diff --git a/samples/__init__.py b/samples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/samples/pytest.ini b/samples/pytest.ini new file mode 100644 index 00000000..e69de29b diff --git a/samples/snippets/__init__.py b/samples/snippets/__init__.py new file mode 100644 index 00000000..fa3a9cd6 --- /dev/null +++ b/samples/snippets/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +__version__ = "1.0.0-a1" diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py new file mode 100644 index 00000000..dc78bc4e --- /dev/null +++ b/samples/snippets/conftest.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" +SQLAlchemy dialect for Google BigQuery +""" + +from google.cloud import bigquery +import pytest +import sqlalchemy +import test_utils.prefixer + +prefixer = test_utils.prefixer.Prefixer("python-bigquery-sqlalchemy", "tests/system") + + +@pytest.fixture(scope="session") +def client(): + return bigquery.Client() + + +@pytest.fixture(scope="session") +def dataset_id(client: bigquery.Client): + project_id = client.project + dataset_id = prefixer.create_prefix() + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") + dataset = client.create_dataset(dataset) + yield dataset_id + client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="session") +def engine(dataset_id): + return sqlalchemy.create_engine(f"bigquery:///{dataset_id}") diff --git a/samples/snippets/geography.py b/samples/snippets/geography.py new file mode 100644 index 00000000..d6adc115 --- /dev/null +++ b/samples/snippets/geography.py @@ -0,0 +1,66 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +def example(engine): + # [START bigquery_sqlalchemy_create_table_with_geography] + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, String + from sqlalchemy_bigquery import GEOGRAPHY + + Base = declarative_base() + + class Lake(Base): + __tablename__ = "lakes" + + name = Column(String, primary_key=True) + geog = Column(GEOGRAPHY) + + # [END bigquery_sqlalchemy_create_table_with_geography] + Lake.__table__.create(engine) + + # [START bigquery_sqlalchemy_insert_geography] + from sqlalchemy.orm import sessionmaker + from sqlalchemy_bigquery import WKT + + Session = sessionmaker(bind=engine) + session = Session() + + lake = Lake(name="Majeur", geog="POLYGON((0 0,1 0,1 1,0 1,0 0))") + lake2 = Lake(name="Garde", geog=WKT("POLYGON((1 0,3 0,3 2,1 2,1 0))")) + b = WKT("POLYGON((3 0,6 0,6 3,3 3,3 0))").wkb + lake3 = Lake(name="Orta", geog=b) + + session.add_all((lake, lake2, lake3)) + session.commit() + # [END bigquery_sqlalchemy_insert_geography] + + # [START bigquery_sqlalchemy_query_geography_wkb] + from sqlalchemy import func + + lakes_touching_lake2 = list( + session.query(Lake).filter(func.ST_Touches(Lake.geog, lake2.geog)) + ) + # [END bigquery_sqlalchemy_query_geography_wkb] + # [START bigquery_sqlalchemy_query_geography_text] + lakes_containing = list( + session.query(Lake).filter(func.ST_Contains(Lake.geog, "POINT(4 1)")) + ) + # [END bigquery_sqlalchemy_query_geography_text] + return lakes_touching_lake2, lakes_containing diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py new file mode 100644 index 00000000..b008613f --- /dev/null +++ b/samples/snippets/noxfile.py @@ -0,0 +1,266 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys +from typing import Callable, Dict, List, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +BLACK_VERSION = "black==19.10b0" + +# Copy `noxfile_config.py` to your directory and modify it instead. + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": [], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir: str) -> List[str]: + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt new file mode 100644 index 00000000..d51249fe --- /dev/null +++ b/samples/snippets/requirements-test.txt @@ -0,0 +1,12 @@ +attrs==21.2.0 +google-cloud-testutils==1.0.0 +importlib-metadata==4.6.4 +iniconfig==1.1.1 +packaging==21.0 +pluggy==0.13.1 +py==1.10.0 +pyparsing==2.4.7 +pytest==6.2.4 +toml==0.10.2 +typing-extensions==3.10.0.0 +zipp==3.5.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt new file mode 100644 index 00000000..50a070fb --- /dev/null +++ b/samples/snippets/requirements.txt @@ -0,0 +1,65 @@ +aiocontextvars==0.2.2 +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +contextvars==2.4 +dataclasses==0.6; python_version < '3.7' +Deprecated==1.2.12 +Fiona==1.8.20 +future==0.18.2 +GeoAlchemy2==0.9.3 +geopandas==0.9.0 +google-api-core==2.0.0 +google-auth==2.0.1 +google-cloud-bigquery==2.24.1 +google-cloud-bigquery-storage==2.6.3 +google-cloud-core==2.0.0 +google-crc32c==1.1.2 +google-resumable-media==2.0.0 +googleapis-common-protos==1.53.0 +greenlet==1.1.1 +grpcio==1.39.0 +idna==3.2 +immutables==0.16 +importlib-metadata==4.6.4 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5; python_version < '3.7' +numpy==1.21.2; python_version >= '3.7' +opentelemetry-api==1.4.1 +opentelemetry-instrumentation==0.23b2 +opentelemetry-sdk==1.4.1 +opentelemetry-semantic-conventions==0.23b2 +packaging==21.0 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1; python_version < '3.7' +pyproj==3.1.0; python_version >= '3.7' +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 +Shapely==1.7.1 +six==1.16.0 +SQLAlchemy==1.4.23 +sqlalchemy-bigquery==1.0.0 +tqdm==4.62.2 +typing-extensions==3.10.0.0 +typing-inspect==0.7.1 +urllib3==1.26.6 +wrapt==1.12.1 +zipp==3.5.0 diff --git a/samples/snippets/test_geography.py b/samples/snippets/test_geography.py new file mode 100644 index 00000000..7a570b81 --- /dev/null +++ b/samples/snippets/test_geography.py @@ -0,0 +1,27 @@ +# Copyright (c) 2021 The sqlalchemy-bigquery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +def test_geography(engine): + from . import geography + + lakes_touching_lake2, lakes_containing = geography.example(engine) + + assert sorted(lake.name for lake in lakes_touching_lake2) == ["Majeur", "Orta"] + assert [lake.name for lake in lakes_containing] == ["Orta"] diff --git a/setup.py b/setup.py index 437c0df0..f70c3a0d 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import io +import itertools import os import re from setuptools import setup @@ -44,6 +45,13 @@ def readme(): return f.read() +extras = dict( + geography=["GeoAlchemy2", "shapely"], + alembic=["alembic"], + tests=["packaging", "pytz"], +) +extras["all"] = set(itertools.chain.from_iterable(extras.values())) + setup( name=name, version=version, @@ -75,12 +83,13 @@ def readme(): # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-auth>=1.25.0,<3.0.0dev", # Work around pip wack. - "google-cloud-bigquery>=2.19.0", + "google-cloud-bigquery>=2.24.1", "sqlalchemy>=1.2.0,<1.5.0dev", "future", ], + extras_require=extras, python_requires=">=3.6, <3.10", - tests_require=["pytz"], + tests_require=["packaging", "pytz"], entry_points={ "sqlalchemy.dialects": ["bigquery = sqlalchemy_bigquery:BigQueryDialect"] }, diff --git a/sqlalchemy_bigquery/__init__.py b/sqlalchemy_bigquery/__init__.py index fb08e576..f0defda1 100644 --- a/sqlalchemy_bigquery/__init__.py +++ b/sqlalchemy_bigquery/__init__.py @@ -20,47 +20,55 @@ SQLAlchemy dialect for Google BigQuery """ -from .version import __version__ +from .version import __version__ # noqa -from .base import BigQueryDialect +from .base import BigQueryDialect, dialect # noqa from .base import ( - STRING, + ARRAY, + BIGNUMERIC, BOOL, BOOLEAN, + BYTES, + DATE, + DATETIME, + FLOAT, + FLOAT64, INT64, INTEGER, - FLOAT64, - FLOAT, - TIMESTAMP, - DATETIME, - DATE, - BYTES, - TIME, - RECORD, NUMERIC, - BIGNUMERIC, + RECORD, + STRING, + TIME, + TIMESTAMP, ) __all__ = [ - "__version__", + "ARRAY", + "BIGNUMERIC", "BigQueryDialect", - "STRING", "BOOL", "BOOLEAN", + "BYTES", + "DATE", + "DATETIME", + "FLOAT", + "FLOAT64", "INT64", "INTEGER", - "FLOAT64", - "FLOAT", - "TIMESTAMP", - "DATETIME", - "DATE", - "BYTES", - "TIME", - "RECORD", "NUMERIC", - "BIGNUMERIC", + "RECORD", + "STRING", + "TIME", + "TIMESTAMP", ] +try: + from .geography import GEOGRAPHY, WKB, WKT # noqa +except ImportError: + pass +else: + __all__.extend(["GEOGRAPHY", "WKB", "WKT"]) + try: import pybigquery # noqa except ImportError: diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index db7336f6..e4f86e7b 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -55,8 +55,15 @@ from .parse_url import parse_url from sqlalchemy_bigquery import _helpers +try: + from .geography import GEOGRAPHY +except ImportError: + pass + FIELD_ILLEGAL_CHARACTERS = re.compile(r"[^\w]+") +TABLE_VALUED_ALIAS_ALIASES = "bigquery_table_valued_alias_aliases" + def assert_(cond, message="Assertion failed"): # pragma: NO COVER if not cond: @@ -109,38 +116,46 @@ def format_label(self, label, name=None): _type_map = { - "STRING": types.String, - "BOOL": types.Boolean, + "ARRAY": types.ARRAY, + "BIGNUMERIC": types.Numeric, "BOOLEAN": types.Boolean, - "INT64": types.Integer, - "INTEGER": types.Integer, + "BOOL": types.Boolean, + "BYTES": types.BINARY, + "DATETIME": types.DATETIME, + "DATE": types.DATE, "FLOAT64": types.Float, "FLOAT": types.Float, + "INT64": types.Integer, + "INTEGER": types.Integer, + "NUMERIC": types.Numeric, + "RECORD": types.JSON, + "STRING": types.String, "TIMESTAMP": types.TIMESTAMP, - "DATETIME": types.DATETIME, - "DATE": types.DATE, - "BYTES": types.BINARY, "TIME": types.TIME, - "RECORD": types.JSON, - "NUMERIC": types.Numeric, - "BIGNUMERIC": types.Numeric, } -STRING = _type_map["STRING"] -BOOL = _type_map["BOOL"] +# By convention, dialect-provided types are spelled with all upper case. +ARRAY = _type_map["ARRAY"] +BIGNUMERIC = _type_map["NUMERIC"] BOOLEAN = _type_map["BOOLEAN"] -INT64 = _type_map["INT64"] -INTEGER = _type_map["INTEGER"] +BOOL = _type_map["BOOL"] +BYTES = _type_map["BYTES"] +DATETIME = _type_map["DATETIME"] +DATE = _type_map["DATE"] FLOAT64 = _type_map["FLOAT64"] FLOAT = _type_map["FLOAT"] +INT64 = _type_map["INT64"] +INTEGER = _type_map["INTEGER"] +NUMERIC = _type_map["NUMERIC"] +RECORD = _type_map["RECORD"] +STRING = _type_map["STRING"] TIMESTAMP = _type_map["TIMESTAMP"] -DATETIME = _type_map["DATETIME"] -DATE = _type_map["DATE"] -BYTES = _type_map["BYTES"] TIME = _type_map["TIME"] -RECORD = _type_map["RECORD"] -NUMERIC = _type_map["NUMERIC"] -BIGNUMERIC = _type_map["NUMERIC"] + +try: + _type_map["GEOGRAPHY"] = GEOGRAPHY +except NameError: + pass class BigQueryExecutionContext(DefaultExecutionContext): @@ -235,8 +250,63 @@ def visit_insert(self, insert_stmt, asfrom=False, **kw): insert_stmt, asfrom=False, **kw ) + def visit_table_valued_alias(self, element, **kw): + # When using table-valued functions, like UNNEST, BigQuery requires a + # FROM for any table referenced in the function, including expressions + # in function arguments. + # + # For example, given SQLAlchemy code: + # + # print( + # select([func.unnest(foo.c.objects).alias('foo_objects').column]) + # .compile(engine)) + # + # Left to it's own devices, SQLAlchemy would outout: + # + # SELECT `foo_objects` + # FROM unnest(`foo`.`objects`) AS `foo_objects` + # + # But BigQuery diesn't understand the `foo` reference unless + # we add as reference to `foo` in the FROM: + # + # SELECT foo_objects + # FROM `foo`, UNNEST(`foo`.`objects`) as foo_objects + # + # This is tricky because: + # 1. We have to find the table references. + # 2. We can't know practically if there's already a FROM for a table. + # + # We leverage visit_column to find a table reference. Whenever we find + # one, we create an alias for it, so as not to conflict with an existing + # reference if one is present. + # + # This requires communicating between this function and visit_column. + # We do this by sticking a dictionary in the keyword arguments. + # This dictionary: + # a. Tells visit_column that it's an a table-valued alias expresssion, and + # b. Gives it a place to record the aliases it creates. + # + # This function creates aliases in the FROM list for any aliases recorded + # by visit_column. + + kw[TABLE_VALUED_ALIAS_ALIASES] = {} + ret = super().visit_table_valued_alias(element, **kw) + aliases = kw.pop(TABLE_VALUED_ALIAS_ALIASES) + if aliases: + aliases = ", ".join( + f"{self.preparer.quote(tablename)} {self.preparer.quote(alias)}" + for tablename, alias in aliases.items() + ) + ret = f"{aliases}, {ret}" + return ret + def visit_column( - self, column, add_to_result_map=None, include_table=True, **kwargs + self, + column, + add_to_result_map=None, + include_table=True, + result_map_targets=(), + **kwargs, ): name = orig_name = column.name if name is None: @@ -247,7 +317,12 @@ def visit_column( name = self._truncated_identifier("colident", name) if add_to_result_map is not None: - add_to_result_map(name, orig_name, (column, name, column.key), column.type) + targets = (column, name, column.key) + result_map_targets + if getattr(column, "_tq_label", None): + # _tq_label was added in SQLAlchemy 1.4 + targets += (column._tq_label,) + + add_to_result_map(name, orig_name, targets, column.type) if is_literal: name = self.escape_literal_column(name) @@ -260,6 +335,14 @@ def visit_column( tablename = table.name if isinstance(tablename, elements._truncated_label): tablename = self._truncated_identifier("alias", tablename) + elif TABLE_VALUED_ALIAS_ALIASES in kwargs: + aliases = kwargs[TABLE_VALUED_ALIAS_ALIASES] + if tablename not in aliases: + aliases[tablename] = self.anon_map[ + f"{TABLE_VALUED_ALIAS_ALIASES} {tablename}" + ] + tablename = aliases[tablename] + return self.preparer.quote(tablename) + "." + name def visit_label(self, *args, within_group_by=False, **kwargs): @@ -400,6 +483,39 @@ def visit_bindparam( skip_bind_expression=False, **kwargs, ): + type_ = bindparam.type + unnest = False + if ( + bindparam.expanding + and not isinstance(type_, NullType) + and not literal_binds + ): + # Normally, when performing an IN operation, like: + # + # foo IN (some_sequence) + # + # SQAlchemy passes `foo` as a parameter and unpacks + # `some_sequence` and passes each element as a parameter. + # This mechanism is refered to as "expanding". It's + # inefficient and can't handle large arrays. (It's also + # very complicated, but that's not the issue we care about + # here. :) ) BigQuery lets us use arrays directly in this + # context, we just need to call UNNEST on an array when + # it's used in IN. + # + # So, if we get an `expanding` flag, and if we have a known type + # (and don't have literal binds, which are implemented in-line in + # in the SQL), we turn off expanding and we set an unnest flag + # so that we add an UNNEST() call (below). + # + # The NullType/known-type check has to do with some extreme + # edge cases having to do with empty in-lists that get special + # hijinks from SQLAlchemy that we don't want to disturb. :) + if getattr(bindparam, "expand_op", None) is not None: + assert bindparam.expand_op.__name__.endswith("in_op") # in in + bindparam.expanding = False + unnest = True + param = super(BigQueryCompiler, self).visit_bindparam( bindparam, within_columns_clause, @@ -408,8 +524,7 @@ def visit_bindparam( **kwargs, ) - type_ = bindparam.type - if isinstance(type_, NullType): + if literal_binds or isinstance(type_, NullType): return param if ( @@ -444,6 +559,9 @@ def visit_bindparam( assert_(type_ is None) param = f"%({name}:{bq_type})s" + if unnest: + param = f"UNNEST({param})" + return param @@ -479,7 +597,7 @@ def visit_BINARY(self, type_, **kw): return f"BYTES({type_.length})" return "BYTES" - visit_VARBINARY = visit_BINARY + visit_VARBINARY = visit_BLOB = visit_BINARY def visit_NUMERIC(self, type_, **kw): if (type_.precision is not None) and isinstance( @@ -631,7 +749,7 @@ class BigQueryDialect(DefaultDialect): supports_pk_autoincrement = False supports_default_values = False supports_empty_insert = False - supports_multiline_insert = True + supports_multivalues_insert = True supports_unicode_statements = True supports_unicode_binds = True supports_native_decimal = True @@ -974,6 +1092,8 @@ def get_view_definition(self, connection, view_name, schema=None, **kw): return view.view_query +dialect = BigQueryDialect + try: import alembic # noqa except ImportError: diff --git a/sqlalchemy_bigquery/geography.py b/sqlalchemy_bigquery/geography.py new file mode 100644 index 00000000..9a10c236 --- /dev/null +++ b/sqlalchemy_bigquery/geography.py @@ -0,0 +1,242 @@ +# Copyright (c) 2021 The PyBigQuery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import geoalchemy2 +from geoalchemy2.shape import to_shape +import geoalchemy2.functions +from shapely import wkb, wkt +import sqlalchemy.ext.compiler +from sqlalchemy.sql.elements import BindParameter + +SRID = 4326 # WGS84, https://spatialreference.org/ref/epsg/wgs-84/ + + +class WKB(geoalchemy2.WKBElement): + """ + Well-Known-Binary data wrapper. + + WKB objects hold geographic data in a binary format known as + "Well-Known Binary", + . + These objects are returned from queries and can be used in insert + and queries. + + The WKB class is a subclass of the Geoalchemy2 WKBElement class + customized for working with BigQuery. + """ + + geom_from_extended_version = "ST_GeogFromWKB" + + def __init__(self, data): + super().__init__(data, SRID, True) + + @property + def wkt(self): + """ + Return the WKB object as a WKT object. + """ + return WKT(to_shape(self).wkt) + + +class WKT(geoalchemy2.WKTElement): + """ + Well-Known-Text data wrapper. + + WKT objects hold geographic data in a text format known as + "Well-Known Text", + . + + You generally shouldn't need to create WKT objects directly, as + text arguments to geographic functions and inserts to GEOGRAPHY + columns are automatically coerced to geographic data. + + The WKT class is a subclass of the Geoalchemy2 WKTElement class + customized for working with BigQuery. + """ + + geom_from_extended_version = "ST_GeogFromText" + + def __init__(self, data): + super().__init__(data, SRID, True) + + @property + def wkb(self): + """ + Return the WKT object as a WKB object. + """ + return WKB(wkb.dumps(wkt.loads(self.data))) + + +class GEOGRAPHY(geoalchemy2.Geography): + """ + GEOGRAPHY type + + Use this to define BigQuery GEOGRAPHY columns:: + + class Lake(Base): + __tablename__ = 'lakes' + + name = Column(String) + geog = column(GEOGRAPHY) + + + """ + + def __init__(self): + super().__init__( + geometry_type=None, spatial_index=False, srid=SRID, + ) + self.extended = True + + # Un-inherit the bind function that adds an ST_GeogFromText. + # It's unnecessary and causes BigQuery to error. + # + # Some things to note about this: + # + # 1. bind_expression can't always know the value. When multiple + # rows are being inserted, the values may be different in each + # row. As a consequence, we have to treat all the values as WKT. + # + # 2. This applies equally to explicitly converting with + # st_geogfromtext, or implicitly with the geography parameter + # conversion. + # + # 3. We handle different types using bind_processor, below. + # + bind_expression = sqlalchemy.sql.type_api.TypeEngine.bind_expression + + def bind_processor(self, dialect): + """ + SqlAlchemy plugin that controls how values are converted to parameters + + When we bind values, we always bind as text. We have to do + this because when we decide how to bind, we don't always know + what the values will be. + + This is not a user-facing method. + """ + + def process(bindvalue): + if isinstance(bindvalue, WKT): + return bindvalue.data + elif isinstance(bindvalue, WKB): + return bindvalue.wkt.data + else: + return bindvalue + + return process + + @staticmethod + def ElementType(data, srid=SRID, extended=True): + """ + Plugin for the Geoalchemy2 framework for constructing WKB objects. + + The framework wants a callable, which it assumes is a class + (this the name), for constructing a geographic element. + + We don't want `WKB` to accept extra arguments that it checks + and ignores, so we do that in this wrapper. + + This is not a user-facing method. + """ + if srid != SRID: + raise AssertionError("Bad srid", srid) + if not extended: + raise AssertionError("Extended must be True.") + return WKB(data) + + +@sqlalchemy.ext.compiler.compiles(geoalchemy2.functions.GenericFunction, "bigquery") +def _fixup_st_arguments(element, compiler, **kw): + """ + Compiler-plugin for the BigQuery that overrides how geographic functions are handled + + Geographic function (ST_...) get turned into + `geoalchemy2.functions.GenericFunction` objects by + Geoalchemy2. The code here overrides how they're handeled. + + We want arguments passed to have the GEOGRAPHY type associated + with them, when appropriate, where "when appropriate" is + determined by the `function documentation + `_.. + + This is not a user-facing function. + """ + argument_types = _argument_types.get(element.name.lower()) + if argument_types: + for argument_type, argument in zip(argument_types, element.clauses.clauses): + if isinstance(argument, BindParameter) and ( + argument.type is not argument_type + or not isinstance(argument.type, argument_type) + ): + argument.type = argument_type() + + return compiler.visit_function(element, **kw) + + +_argument_types = dict( + st_area=(GEOGRAPHY,), + st_asbinary=(GEOGRAPHY,), + st_asgeojson=(GEOGRAPHY,), + st_astext=(GEOGRAPHY,), + st_boundary=(GEOGRAPHY,), + st_centroid=(GEOGRAPHY,), + st_centroid_agg=(GEOGRAPHY,), + st_closestpoint=(GEOGRAPHY, GEOGRAPHY,), + st_clusterdbscan=(GEOGRAPHY,), + st_contains=(GEOGRAPHY, GEOGRAPHY,), + st_convexhull=(GEOGRAPHY,), + st_coveredby=(GEOGRAPHY, GEOGRAPHY,), + st_covers=(GEOGRAPHY, GEOGRAPHY,), + st_difference=(GEOGRAPHY, GEOGRAPHY,), + st_dimension=(GEOGRAPHY,), + st_disjoint=(GEOGRAPHY, GEOGRAPHY,), + st_distance=(GEOGRAPHY, GEOGRAPHY,), + st_dump=(GEOGRAPHY,), + st_dwithin=(GEOGRAPHY, GEOGRAPHY,), + st_endpoint=(GEOGRAPHY,), + st_equals=(GEOGRAPHY, GEOGRAPHY,), + st_exteriorring=(GEOGRAPHY,), + st_geohash=(GEOGRAPHY,), + st_intersection=(GEOGRAPHY, GEOGRAPHY,), + st_intersects=(GEOGRAPHY, GEOGRAPHY,), + st_intersectsbox=(GEOGRAPHY,), + st_iscollection=(GEOGRAPHY,), + st_isempty=(GEOGRAPHY,), + st_length=(GEOGRAPHY,), + st_makeline=(GEOGRAPHY, GEOGRAPHY,), + st_makepolygon=(GEOGRAPHY, GEOGRAPHY,), + st_makepolygonoriented=(GEOGRAPHY,), + st_maxdistance=(GEOGRAPHY, GEOGRAPHY,), + st_npoints=(GEOGRAPHY,), + st_numpoints=(GEOGRAPHY,), + st_perimeter=(GEOGRAPHY,), + st_pointn=(GEOGRAPHY,), + st_simplify=(GEOGRAPHY,), + st_snaptogrid=(GEOGRAPHY,), + st_startpoint=(GEOGRAPHY,), + st_touches=(GEOGRAPHY, GEOGRAPHY,), + st_union=(GEOGRAPHY, GEOGRAPHY,), + st_union_agg=(GEOGRAPHY,), + st_within=(GEOGRAPHY, GEOGRAPHY,), + st_x=(GEOGRAPHY,), + st_y=(GEOGRAPHY,), +) + +__all__ = ["GEOGRAPHY", "WKB", "WKT"] diff --git a/sqlalchemy_bigquery/version.py b/sqlalchemy_bigquery/version.py index 7ad8fe34..ef8460f5 100644 --- a/sqlalchemy_bigquery/version.py +++ b/sqlalchemy_bigquery/version.py @@ -17,4 +17,4 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -__version__ = "1.0.0" +__version__ = "1.1.0" diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 1785edd0..e5ed0b2a 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -6,5 +6,5 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", sqlalchemy==1.2.0 google-auth==1.25.0 -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.24.1 google-api-core==1.30.0 diff --git a/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py b/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py index c126c4f7..156e6167 100644 --- a/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py +++ b/tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py @@ -19,6 +19,7 @@ import datetime import mock +import packaging.version import pytest import pytz import sqlalchemy @@ -41,7 +42,7 @@ ) -if sqlalchemy.__version__ < "1.4": +if packaging.version.parse(sqlalchemy.__version__) < packaging.version.parse("1.4"): from sqlalchemy.testing.suite import LimitOffsetTest as _LimitOffsetTest class LimitOffsetTest(_LimitOffsetTest): diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 3b3bda8e..d9db14ab 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -21,6 +21,7 @@ from typing import List import pytest +import sqlalchemy from google.cloud import bigquery import test_utils.prefixer @@ -122,6 +123,14 @@ def bigquery_regional_dataset(bigquery_client, bigquery_schema): bigquery_client.delete_dataset(dataset_id, delete_contents=True) +@pytest.fixture(autouse=True) +def cleanup_extra_tables(bigquery_client, bigquery_dataset): + common = "sample", "sample_one_row", "sample_view", "sample_dml_empty" + for table in bigquery_client.list_tables(bigquery_dataset): + if table.table_id not in common: + bigquery_client.delete_table(table) + + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): for dataset in bigquery_client.list_datasets(): @@ -129,3 +138,8 @@ def cleanup_datasets(bigquery_client: bigquery.Client): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True ) + + +@pytest.fixture +def metadata(): + return sqlalchemy.MetaData() diff --git a/tests/system/test_alembic.py b/tests/system/test_alembic.py index db9ceb4f..81c686d1 100644 --- a/tests/system/test_alembic.py +++ b/tests/system/test_alembic.py @@ -22,13 +22,10 @@ import pytest from sqlalchemy import Column, DateTime, Integer, String -try: - import alembic # noqa -except ImportError: - alembic = None - import google.api_core.exceptions +alembic = pytest.importorskip("alembic") + @pytest.fixture def alembic_table(bigquery_dataset, bigquery_client): @@ -62,7 +59,6 @@ def get_table(table_name, data="table"): yield get_table -@pytest.mark.skipif(alembic is None, reason="Alembic isn't installed.") def test_alembic_scenario(alembic_table): """ Exercise all of the operations we support. diff --git a/tests/system/test_geography.py b/tests/system/test_geography.py new file mode 100644 index 00000000..18bcc7d4 --- /dev/null +++ b/tests/system/test_geography.py @@ -0,0 +1,304 @@ +# Copyright (c) 2021 The PyBigQuery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import pytest + +geoalchemy2 = pytest.importorskip("geoalchemy2") + + +def test_geoalchemy2_core(bigquery_dataset): + """Make sure GeoAlchemy 2 Core Tutorial works as adapted to only having geography + + https://geoalchemy-2.readthedocs.io/en/latest/core_tutorial.html + + Note: + + - Bigquery doesn't have ST_BUFFER + """ + + # Connect to the DB + + from sqlalchemy import create_engine + + engine = create_engine(f"bigquery:///{bigquery_dataset}") + + # Create the Table + + from sqlalchemy import Table, Column, String, MetaData + from sqlalchemy_bigquery import GEOGRAPHY + + metadata = MetaData() + lake_table = Table( + "lake", metadata, Column("name", String), Column("geog", GEOGRAPHY) + ) + + lake_table.create(engine) + + # Insertions + + conn = engine.connect() + + conn.execute( + lake_table.insert().values( + name="Majeur", geog="POLYGON((0 0,1 0,1 1,0 1,0 0))", + ) + ) + + conn.execute( + lake_table.insert(), + [ + {"name": "Garde", "geog": "POLYGON((1 0,3 0,3 2,1 2,1 0))"}, + {"name": "Orta", "geog": "POLYGON((3 0,6 0,6 3,3 3,3 0))"}, + ], + ) + + # Selections + + from sqlalchemy.sql import select + + assert sorted( + (r.name, r.geog.desc[:4]) for r in conn.execute(select([lake_table])) + ) == [("Garde", "0103"), ("Majeur", "0103"), ("Orta", "0103")] + + # Spatial query + + from sqlalchemy import func + + [[result]] = conn.execute( + select([lake_table.c.name], func.ST_Contains(lake_table.c.geog, "POINT(4 1)")) + ) + assert result == "Orta" + + assert sorted( + (r.name, int(r.area)) + for r in conn.execute( + select([lake_table.c.name, lake_table.c.geog.ST_AREA().label("area")]) + ) + ) == [("Garde", 49452374328), ("Majeur", 12364036567), ("Orta", 111253664228)] + + # Extra: Make sure we can save a retrieved value back: + + [[geog]] = conn.execute(select([lake_table.c.geog], lake_table.c.name == "Garde")) + conn.execute(lake_table.insert().values(name="test", geog=geog)) + assert ( + int( + list( + conn.execute( + select([lake_table.c.geog.st_area()], lake_table.c.name == "test") + ) + )[0][0] + ) + == 49452374328 + ) + + # and, while we're at it, that we can insert WKTs, although we + # normally wouldn't want to. + from sqlalchemy_bigquery import WKT + + conn.execute( + lake_table.insert().values( + name="test2", geog=WKT("POLYGON((1 0,3 0,3 2,1 2,1 0))"), + ) + ) + assert ( + int( + list( + conn.execute( + select([lake_table.c.geog.st_area()], lake_table.c.name == "test2") + ) + )[0][0] + ) + == 49452374328 + ) + + +def test_geoalchemy2_orm(bigquery_dataset): + """Make sure GeoAlchemy 2 ORM Tutorial works as adapted to only having geometry + + https://geoalchemy-2.readthedocs.io/en/latest/orm_tutorial.html + """ + + # Connect to the DB + + from sqlalchemy import create_engine + + engine = create_engine(f"bigquery:///{bigquery_dataset}") + + # Declare a Mapping + + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer, String + from sqlalchemy_bigquery import GEOGRAPHY + + Base = declarative_base() + + class Lake(Base): + __tablename__ = "lake" + # The ORM insists on an id, but bigquery doesn't auto-assign + # ids, so we'll have to provide them below. + id = Column(Integer, primary_key=True) + name = Column(String) + geog = Column(GEOGRAPHY) + + # Create the Table in the Database + + Lake.__table__.create(engine) + + # Create an Instance of the Mapped Class + + lake = Lake(id=1, name="Majeur", geog="POLYGON((0 0,1 0,1 1,0 1,0 0))") + + # Create a Session + + from sqlalchemy.orm import sessionmaker + + Session = sessionmaker(bind=engine) + + session = Session() + + # Add New Objects + + session.add(lake) + session.commit() + + our_lake = session.query(Lake).filter_by(name="Majeur").first() + assert our_lake.name == "Majeur" + + from geoalchemy2 import WKBElement + + assert isinstance(our_lake.geog, WKBElement) + + session.add_all( + [ + Lake(id=2, name="Garde", geog="POLYGON((1 0,3 0,3 2,1 2,1 0))"), + Lake(id=3, name="Orta", geog="POLYGON((3 0,6 0,6 3,3 3,3 0))"), + ] + ) + + session.commit() + + # Query + + query = session.query(Lake).order_by(Lake.name) + assert [lake.name for lake in query] == ["Garde", "Majeur", "Orta"] + + assert [lake.name for lake in session.query(Lake).order_by(Lake.name).all()] == [ + "Garde", + "Majeur", + "Orta", + ] + + # Make Spatial Queries + + from sqlalchemy import func + + query = session.query(Lake).filter(func.ST_Contains(Lake.geog, "POINT(4 1)")) + + assert [lake.name for lake in query] == ["Orta"] + + query = ( + session.query(Lake) + .filter(Lake.geog.ST_Intersects("LINESTRING(2 1,4 1)")) + .order_by(Lake.name) + ) + assert [lake.name for lake in query] == ["Garde", "Orta"] + + lake = session.query(Lake).filter_by(name="Garde").one() + assert session.scalar(lake.geog.ST_Intersects("LINESTRING(2 1,4 1)")) + + # Use Other Spatial Functions + query = session.query(Lake.name, func.ST_Area(Lake.geog).label("area")).order_by( + Lake.name + ) + assert [(name, int(area)) for name, area in query] == [ + ("Garde", 49452374328), + ("Majeur", 12364036567), + ("Orta", 111253664228), + ] + + query = session.query(Lake.name, Lake.geog.ST_Area().label("area")).order_by( + Lake.name + ) + assert [(name, int(area)) for name, area in query] == [ + ("Garde", 49452374328), + ("Majeur", 12364036567), + ("Orta", 111253664228), + ] + + +def test_geoalchemy2_orm_w_relationship(bigquery_dataset): + from sqlalchemy import create_engine + + engine = create_engine(f"bigquery:///{bigquery_dataset}") + + from sqlalchemy import Column, Integer, String + from sqlalchemy_bigquery import GEOGRAPHY + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class Treasure(Base): + __tablename__ = "treasure" + id = Column(Integer, primary_key=True) + geog = Column(GEOGRAPHY) + + Treasure.__table__.create(engine) + + from sqlalchemy.orm import relationship, backref + + class Lake(Base): + __tablename__ = "lake" + id = Column(Integer, primary_key=True) + name = Column(String) + geog = Column(GEOGRAPHY) + treasures = relationship( + "Treasure", + primaryjoin="func.ST_Contains(foreign(Lake.geog), Treasure.geog).as_comparison(1, 2)", + backref=backref("lake_rel", uselist=False), + viewonly=True, + uselist=True, + ) + + Lake.__table__.create(engine) + + from sqlalchemy.orm import sessionmaker + + Session = sessionmaker(bind=engine) + + session = Session() + + session.add_all( + [ + Treasure(id=21, geog="Point(1.5 1)"), + Treasure(id=22, geog="Point(2.5 1.5)"), + Treasure(id=31, geog="Point(4.5 2)"), + Treasure(id=42, geog="Point(5.5 1.5)"), + Lake(id=2, name="Garde", geog="POLYGON((1 0,3 0,3 2,1 2,1 0))"), + Lake(id=3, name="Orta", geog="POLYGON((3 0,6 0,6 3,3 3,3 0))"), + ] + ) + + session.commit() + + lakes = session.query(Lake).order_by(Lake.name).all() + assert [(lake.id, sorted(t.id for t in lake.treasures)) for lake in lakes] == [ + (2, [21, 22]), + (3, [31, 42]), + ] diff --git a/tests/system/test_sqlalchemy_bigquery.py b/tests/system/test_sqlalchemy_bigquery.py index 0fe878b2..d8622020 100644 --- a/tests/system/test_sqlalchemy_bigquery.py +++ b/tests/system/test_sqlalchemy_bigquery.py @@ -28,13 +28,13 @@ from sqlalchemy.sql import expression, select, literal_column from sqlalchemy.exc import NoSuchTableError from sqlalchemy.orm import sessionmaker +import packaging.version from pytz import timezone import pytest import sqlalchemy import datetime import decimal - ONE_ROW_CONTENTS_EXPANDED = [ 588, datetime.datetime(2013, 10, 10, 11, 27, 16, tzinfo=timezone("UTC")), @@ -234,7 +234,7 @@ def test_engine_with_dataset(engine_using_test_dataset, bigquery_dataset): table_one_row = Table( "sample_one_row", MetaData(bind=engine_using_test_dataset), autoload=True ) - rows = table_one_row.select().execute().fetchall() + rows = table_one_row.select(use_labels=True).execute().fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED table_one_row = Table( @@ -242,7 +242,7 @@ def test_engine_with_dataset(engine_using_test_dataset, bigquery_dataset): MetaData(bind=engine_using_test_dataset), autoload=True, ) - rows = table_one_row.select().execute().fetchall() + rows = table_one_row.select(use_labels=True).execute().fetchall() # verify that we are pulling from the specifically-named dataset, # instead of pulling from the default dataset of the engine (which # does not have this table at all) @@ -279,7 +279,12 @@ def test_reflect_select(table, table_using_test_dataset): assert isinstance(table.c["nested_record.record.name"].type, types.String) assert isinstance(table.c.array.type, types.ARRAY) - rows = table.select().execute().fetchall() + # Force unique column labels using `use_labels` below to deal + # with BQ sometimes complaining about duplicate column names + # when a destination table is specified, even though no + # destination table is specified. When this test was written, + # `use_labels` was forced by the dialect. + rows = table.select(use_labels=True).execute().fetchall() assert len(rows) == 1000 @@ -296,7 +301,7 @@ def test_record_content_from_raw_queries(engine, bigquery_dataset): def test_content_from_reflect(engine, table_one_row): - rows = table_one_row.select().execute().fetchall() + rows = table_one_row.select(use_labels=True).execute().fetchall() assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED @@ -500,21 +505,21 @@ def test_querying_wildcard_tables(engine): def test_dml(engine, session, table_dml): # test insert engine.execute(table_dml.insert(ONE_ROW_CONTENTS_DML)) - result = table_dml.select().execute().fetchall() + result = table_dml.select(use_labels=True).execute().fetchall() assert len(result) == 1 # test update session.query(table_dml).filter(table_dml.c.string == "test").update( {"string": "updated_row"}, synchronize_session=False ) - updated_result = table_dml.select().execute().fetchone() + updated_result = table_dml.select(use_labels=True).execute().fetchone() assert updated_result[table_dml.c.string] == "updated_row" # test delete session.query(table_dml).filter(table_dml.c.string == "updated_row").delete( synchronize_session=False ) - result = table_dml.select().execute().fetchall() + result = table_dml.select(use_labels=True).execute().fetchall() assert len(result) == 0 @@ -686,3 +691,86 @@ def test_has_table(engine, engine_using_test_dataset, bigquery_dataset): assert engine_using_test_dataset.has_table(f"{bigquery_dataset}.sample") is True assert engine_using_test_dataset.has_table("sample_alt") is False + + +def test_distinct_188(engine, bigquery_dataset): + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer + from sqlalchemy.orm import sessionmaker + + Base = declarative_base() + + class MyTable(Base): + __tablename__ = f"{bigquery_dataset}.test_distinct_188" + id = Column(Integer, primary_key=True) + my_column = Column(Integer) + + MyTable.__table__.create(engine) + + Session = sessionmaker(bind=engine) + db = Session() + db.add_all([MyTable(id=i, my_column=i % 2) for i in range(9)]) + db.commit() + + expected = [(0,), (1,)] + + assert sorted(db.query(MyTable.my_column).distinct().all()) == expected + assert ( + sorted( + db.query( + sqlalchemy.distinct(MyTable.my_column).label("just_a_random_label") + ).all() + ) + == expected + ) + + assert sorted(db.query(sqlalchemy.distinct(MyTable.my_column)).all()) == expected + + +@pytest.mark.skipif( + packaging.version.parse(sqlalchemy.__version__) < packaging.version.parse("1.4"), + reason="requires sqlalchemy 1.4 or higher", +) +def test_huge_in(): + engine = sqlalchemy.create_engine("bigquery://") + conn = engine.connect() + try: + assert list( + conn.execute( + sqlalchemy.select([sqlalchemy.literal(-1).in_(list(range(99999)))]) + ) + ) == [(False,)] + except Exception: + error = True + else: + error = False + + assert not error, "execution failed" + + +@pytest.mark.skipif( + packaging.version.parse(sqlalchemy.__version__) < packaging.version.parse("1.4"), + reason="unnest (and other table-valued-function) support required version 1.4", +) +def test_unnest(engine, bigquery_dataset): + from sqlalchemy import select, func, String + from sqlalchemy_bigquery import ARRAY + + conn = engine.connect() + metadata = MetaData() + table = Table( + f"{bigquery_dataset}.test_unnest", metadata, Column("objects", ARRAY(String)), + ) + metadata.create_all(engine) + conn.execute( + table.insert(), [dict(objects=["a", "b", "c"]), dict(objects=["x", "y"])] + ) + query = select([func.unnest(table.c.objects).alias("foo_objects").column]) + compiled = str(query.compile(engine)) + assert " ".join(compiled.strip().split()) == ( + f"SELECT `foo_objects`" + f" FROM" + f" `{bigquery_dataset}.test_unnest` `{bigquery_dataset}.test_unnest_1`," + f" unnest(`{bigquery_dataset}.test_unnest_1`.`objects`) AS `foo_objects`" + ) + assert sorted(r[0] for r in conn.execute(query)) == ["a", "b", "c", "x", "y"] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 1c78b12d..886e9aee 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -21,20 +21,24 @@ import mock import sqlite3 +import packaging.version import pytest import sqlalchemy import fauxdbi -sqlalchemy_version_info = tuple(map(int, sqlalchemy.__version__.split("."))) +sqlalchemy_version = packaging.version.parse(sqlalchemy.__version__) sqlalchemy_1_3_or_higher = pytest.mark.skipif( - sqlalchemy_version_info < (1, 3), reason="requires sqlalchemy 1.3 or higher" + sqlalchemy_version < packaging.version.parse("1.3"), + reason="requires sqlalchemy 1.3 or higher", ) sqlalchemy_1_4_or_higher = pytest.mark.skipif( - sqlalchemy_version_info < (1, 4), reason="requires sqlalchemy 1.4 or higher" + sqlalchemy_version < packaging.version.parse("1.4"), + reason="requires sqlalchemy 1.4 or higher", ) sqlalchemy_before_1_4 = pytest.mark.skipif( - sqlalchemy_version_info >= (1, 4), reason="requires sqlalchemy 1.3 or lower" + sqlalchemy_version >= packaging.version.parse("1.4"), + reason="requires sqlalchemy 1.3 or lower", ) @@ -71,6 +75,17 @@ def ex(sql, *args, **kw): conn.close() +@pytest.fixture() +def last_query(faux_conn): + def last_query(sql, params=None, offset=1): + actual_sql, actual_params = faux_conn.test_data["execute"][-offset] + assert actual_sql == sql + if params is not None: + assert actual_params == params + + return last_query + + @pytest.fixture() def metadata(): return sqlalchemy.MetaData() diff --git a/tests/unit/fauxdbi.py b/tests/unit/fauxdbi.py index 56c44e0f..631996af 100644 --- a/tests/unit/fauxdbi.py +++ b/tests/unit/fauxdbi.py @@ -261,11 +261,20 @@ def __handle_problematic_literal_inserts( else: return operation - __handle_unnest = substitute_string_re_method( - r"UNNEST\(\[ ([^\]]+)? \]\)", # UNNEST([ ... ]) - flags=re.IGNORECASE, - repl=r"(\1)", + @substitute_re_method( + r""" + UNNEST\( + ( + \[ (?P[^\]]+)? \] # UNNEST([ ... ]) + | + ([?]) # UNNEST(?) + ) + \) + """, + flags=re.IGNORECASE | re.VERBOSE, ) + def __handle_unnest(self, m): + return "(" + (m.group("exp") or "?") + ")" def __handle_true_false(self, operation): # Older sqlite versions, like those used on the CI servers diff --git a/tests/unit/test_compliance.py b/tests/unit/test_compliance.py index 7cae1825..9ba27cf6 100644 --- a/tests/unit/test_compliance.py +++ b/tests/unit/test_compliance.py @@ -200,3 +200,19 @@ def test_group_by_composed(faux_conn): select([sqlalchemy.func.count(table.c.id), expr]).group_by(expr).order_by(expr) ) assert_result(faux_conn, stmt, [(1, 3), (1, 5), (1, 7)]) + + +def test_cast_type_decorator(faux_conn, last_query): + # [artial dup of: + # sqlalchemy.testing.suite.test_types.CastTypeDecoratorTest.test_special_type + # That test failes without code that's otherwise not covered by the unit tests. + + class StringAsInt(sqlalchemy.TypeDecorator): + impl = sqlalchemy.String(50) + + def bind_expression(self, col): + return sqlalchemy.cast(col, String(50)) + + t = setup_table(faux_conn, "t", Column("x", StringAsInt())) + faux_conn.execute(t.insert(), [{"x": x} for x in [1, 2, 3]]) + last_query("INSERT INTO `t` (`x`) VALUES (CAST(%(x:STRING)s AS STRING))", {"x": 3}) diff --git a/tests/unit/test_geography.py b/tests/unit/test_geography.py new file mode 100644 index 00000000..3ee2cce6 --- /dev/null +++ b/tests/unit/test_geography.py @@ -0,0 +1,179 @@ +# Copyright (c) 2021 The PyBigQuery Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import pytest + +from conftest import setup_table + +geoalchemy2 = pytest.importorskip("geoalchemy2") + + +def test_geoalchemy2_core(faux_conn, last_query): + """Make sure GeoAlchemy 2 Core Tutorial works as adapted to only having geometry + """ + conn = faux_conn + + # Create the Table + + from sqlalchemy import Column, String + from sqlalchemy_bigquery import GEOGRAPHY + + lake_table = setup_table( + conn, "lake", Column("name", String), Column("geog", GEOGRAPHY) + ) + + # Insertions + + conn.execute( + lake_table.insert().values( + name="Majeur", geog="POLYGON((0 0,1 0,1 1,0 1,0 0))", + ) + ) + + last_query( + "INSERT INTO `lake` (`name`, `geog`)" + " VALUES (%(name:STRING)s, %(geog:geography)s)", + ({"geog": "POLYGON((0 0,1 0,1 1,0 1,0 0))", "name": "Majeur"}), + ) + + conn.execute( + lake_table.insert(), + [ + {"name": "Garde", "geog": "POLYGON((1 0,3 0,3 2,1 2,1 0))"}, + {"name": "Orta", "geog": "POLYGON((3 0,6 0,6 3,3 3,3 0))"}, + ], + ) + last_query( + "INSERT INTO `lake` (`name`, `geog`)" + " VALUES (%(name:STRING)s, %(geog:geography)s)", + {"name": "Garde", "geog": "POLYGON((1 0,3 0,3 2,1 2,1 0))"}, + offset=2, + ) + last_query( + "INSERT INTO `lake` (`name`, `geog`)" + " VALUES (%(name:STRING)s, %(geog:geography)s)", + {"name": "Orta", "geog": "POLYGON((3 0,6 0,6 3,3 3,3 0))"}, + ) + + # Selections + + from sqlalchemy.sql import select + + try: + conn.execute(select([lake_table])) + except Exception: + pass # sqlite had no special functions :) + last_query( + "SELECT `lake`.`name`, ST_AsBinary(`lake`.`geog`) AS `geog` \n" "FROM `lake`" + ) + + # Spatial query + + from sqlalchemy import func + + try: + conn.execute( + select( + [lake_table.c.name], func.ST_Contains(lake_table.c.geog, "POINT(4 1)") + ) + ) + except Exception: + pass # sqlite had no special functions :) + last_query( + "SELECT `lake`.`name` \n" + "FROM `lake` \n" + "WHERE ST_Contains(`lake`.`geog`, %(ST_Contains_1:geography)s)", + {"ST_Contains_1": "POINT(4 1)"}, + ) + + try: + conn.execute( + select([lake_table.c.name, lake_table.c.geog.ST_AREA().label("area")]) + ) + except Exception: + pass # sqlite had no special functions :) + last_query("SELECT `lake`.`name`, ST_Area(`lake`.`geog`) AS `area` \nFROM `lake`") + + # Extra: Make sure we can save a retrieved value back: + + from sqlalchemy_bigquery import WKB, WKT + + geog = WKT("point(0 0)").wkb + assert isinstance(geog, WKB) + assert geog.data == ( + b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + conn.execute(lake_table.insert().values(name="test", geog=geog)) + last_query( + "INSERT INTO `lake` (`name`, `geog`)" + " VALUES (%(name:STRING)s, %(geog:geography)s)", + {"name": "test", "geog": "POINT (0 0)"}, + ) + + # and, while we're at it, that we can insert WKTs, although we + # normally wouldn't want to. + + conn.execute( + lake_table.insert().values( + name="test2", geog=WKT("POLYGON((1 0,3 0,3 2,1 2,1 0))"), + ) + ) + last_query( + "INSERT INTO `lake` (`name`, `geog`)" + " VALUES (%(name:STRING)s, %(geog:geography)s)", + {"name": "test2", "geog": "POLYGON((1 0,3 0,3 2,1 2,1 0))"}, + ) + + +def test_GEOGRAPHY_ElementType_bad_srid(): + from sqlalchemy_bigquery import GEOGRAPHY + + with pytest.raises(AssertionError, match="Bad srid"): + GEOGRAPHY.ElementType("data", srid=-1) + + +def test_GEOGRAPHY_ElementType_bad_extended(): + from sqlalchemy_bigquery import GEOGRAPHY + + with pytest.raises(AssertionError, match="Extended must be True."): + GEOGRAPHY.ElementType("data", extended=False) + + +def test_GEOGRAPHY_ElementType(): + from sqlalchemy_bigquery import GEOGRAPHY, WKB + + data = GEOGRAPHY.ElementType("data") + assert isinstance(data, WKB) + assert (data.data, data.srid, data.extended) == ("data", 4326, True) + + +def test_calling_st_functions_that_dont_take_geographies(faux_conn, last_query): + from sqlalchemy import select, func + + try: + faux_conn.execute(select([func.ST_GEOGFROMTEXT("point(0 0)")])) + except Exception: + pass # sqlite had no special functions :) + + last_query( + "SELECT ST_AsBinary(ST_GeogFromText(%(ST_GeogFromText_2:STRING)s))" + " AS `ST_GeogFromText_1`", + dict(ST_GeogFromText_2="point(0 0)"), + ) diff --git a/tests/unit/test_select.py b/tests/unit/test_select.py index 5d49ae68..474fc9d9 100644 --- a/tests/unit/test_select.py +++ b/tests/unit/test_select.py @@ -20,6 +20,7 @@ import datetime from decimal import Decimal +import packaging.version import pytest import sqlalchemy @@ -27,6 +28,7 @@ from conftest import ( setup_table, + sqlalchemy_version, sqlalchemy_1_3_or_higher, sqlalchemy_1_4_or_higher, sqlalchemy_before_1_4, @@ -213,18 +215,6 @@ def test_disable_quote(faux_conn): assert faux_conn.test_data["execute"][-1][0] == ("SELECT `t`.foo \nFROM `t`") -def _normalize_in_params(query, params): - # We have to normalize parameter names, because they - # change with sqlalchemy versions. - newnames = sorted( - ((p, f"p_{i}") for i, p in enumerate(sorted(params))), key=lambda i: -len(i[0]) - ) - for old, new in newnames: - query = query.replace(old, new) - - return query, {new: params[old] for old, new in newnames} - - @sqlalchemy_before_1_4 def test_select_in_lit_13(faux_conn): [[isin]] = faux_conn.execute( @@ -239,63 +229,74 @@ def test_select_in_lit_13(faux_conn): @sqlalchemy_1_4_or_higher -def test_select_in_lit(faux_conn): - [[isin]] = faux_conn.execute( - sqlalchemy.select([sqlalchemy.literal(1).in_([1, 2, 3])]) - ) - assert isin - assert _normalize_in_params(*faux_conn.test_data["execute"][-1]) == ( - "SELECT %(p_0:INT64)s IN " - "UNNEST([ %(p_1:INT64)s, %(p_2:INT64)s, %(p_3:INT64)s ]) AS `anon_1`", - {"p_1": 1, "p_2": 2, "p_3": 3, "p_0": 1}, +def test_select_in_lit(faux_conn, last_query): + faux_conn.execute(sqlalchemy.select([sqlalchemy.literal(1).in_([1, 2, 3])])) + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(%(param_2:INT64)s) AS `anon_1`", + {"param_1": 1, "param_2": [1, 2, 3]}, ) -def test_select_in_param(faux_conn): +def test_select_in_param(faux_conn, last_query): [[isin]] = faux_conn.execute( sqlalchemy.select( [sqlalchemy.literal(1).in_(sqlalchemy.bindparam("q", expanding=True))] ), dict(q=[1, 2, 3]), ) - assert isin - assert faux_conn.test_data["execute"][-1] == ( - "SELECT %(param_1:INT64)s IN UNNEST(" - "[ %(q_1:INT64)s, %(q_2:INT64)s, %(q_3:INT64)s ]" - ") AS `anon_1`", - {"param_1": 1, "q_1": 1, "q_2": 2, "q_3": 3}, - ) + if sqlalchemy_version >= packaging.version.parse("1.4"): + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(%(q:INT64)s) AS `anon_1`", + {"param_1": 1, "q": [1, 2, 3]}, + ) + else: + assert isin + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(" + "[ %(q_1:INT64)s, %(q_2:INT64)s, %(q_3:INT64)s ]" + ") AS `anon_1`", + {"param_1": 1, "q_1": 1, "q_2": 2, "q_3": 3}, + ) -def test_select_in_param1(faux_conn): +def test_select_in_param1(faux_conn, last_query): [[isin]] = faux_conn.execute( sqlalchemy.select( [sqlalchemy.literal(1).in_(sqlalchemy.bindparam("q", expanding=True))] ), dict(q=[1]), ) - assert isin - assert faux_conn.test_data["execute"][-1] == ( - "SELECT %(param_1:INT64)s IN UNNEST(" "[ %(q_1:INT64)s ]" ") AS `anon_1`", - {"param_1": 1, "q_1": 1}, - ) + if sqlalchemy_version >= packaging.version.parse("1.4"): + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(%(q:INT64)s) AS `anon_1`", + {"param_1": 1, "q": [1]}, + ) + else: + assert isin + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(" "[ %(q_1:INT64)s ]" ") AS `anon_1`", + {"param_1": 1, "q_1": 1}, + ) @sqlalchemy_1_3_or_higher -def test_select_in_param_empty(faux_conn): +def test_select_in_param_empty(faux_conn, last_query): [[isin]] = faux_conn.execute( sqlalchemy.select( [sqlalchemy.literal(1).in_(sqlalchemy.bindparam("q", expanding=True))] ), dict(q=[]), ) - assert not isin - assert faux_conn.test_data["execute"][-1] == ( - "SELECT %(param_1:INT64)s IN(NULL) AND (1 != 1) AS `anon_1`" - if sqlalchemy.__version__ >= "1.4" - else "SELECT %(param_1:INT64)s IN UNNEST([ ]) AS `anon_1`", - {"param_1": 1}, - ) + if sqlalchemy_version >= packaging.version.parse("1.4"): + last_query( + "SELECT %(param_1:INT64)s IN UNNEST(%(q:INT64)s) AS `anon_1`", + {"param_1": 1, "q": []}, + ) + else: + assert not isin + last_query( + "SELECT %(param_1:INT64)s IN UNNEST([ ]) AS `anon_1`", {"param_1": 1} + ) @sqlalchemy_before_1_4 @@ -312,47 +313,120 @@ def test_select_notin_lit13(faux_conn): @sqlalchemy_1_4_or_higher -def test_select_notin_lit(faux_conn): - [[isnotin]] = faux_conn.execute( - sqlalchemy.select([sqlalchemy.literal(0).notin_([1, 2, 3])]) - ) - assert isnotin - - assert _normalize_in_params(*faux_conn.test_data["execute"][-1]) == ( - "SELECT (%(p_0:INT64)s NOT IN " - "UNNEST([ %(p_1:INT64)s, %(p_2:INT64)s, %(p_3:INT64)s ])) AS `anon_1`", - {"p_0": 0, "p_1": 1, "p_2": 2, "p_3": 3}, +def test_select_notin_lit(faux_conn, last_query): + faux_conn.execute(sqlalchemy.select([sqlalchemy.literal(0).notin_([1, 2, 3])])) + last_query( + "SELECT (%(param_1:INT64)s NOT IN UNNEST(%(param_2:INT64)s)) AS `anon_1`", + {"param_1": 0, "param_2": [1, 2, 3]}, ) -def test_select_notin_param(faux_conn): +def test_select_notin_param(faux_conn, last_query): [[isnotin]] = faux_conn.execute( sqlalchemy.select( [sqlalchemy.literal(1).notin_(sqlalchemy.bindparam("q", expanding=True))] ), dict(q=[1, 2, 3]), ) - assert not isnotin - assert faux_conn.test_data["execute"][-1] == ( - "SELECT (%(param_1:INT64)s NOT IN UNNEST(" - "[ %(q_1:INT64)s, %(q_2:INT64)s, %(q_3:INT64)s ]" - ")) AS `anon_1`", - {"param_1": 1, "q_1": 1, "q_2": 2, "q_3": 3}, - ) + if sqlalchemy_version >= packaging.version.parse("1.4"): + last_query( + "SELECT (%(param_1:INT64)s NOT IN UNNEST(%(q:INT64)s)) AS `anon_1`", + {"param_1": 1, "q": [1, 2, 3]}, + ) + else: + assert not isnotin + last_query( + "SELECT (%(param_1:INT64)s NOT IN UNNEST(" + "[ %(q_1:INT64)s, %(q_2:INT64)s, %(q_3:INT64)s ]" + ")) AS `anon_1`", + {"param_1": 1, "q_1": 1, "q_2": 2, "q_3": 3}, + ) @sqlalchemy_1_3_or_higher -def test_select_notin_param_empty(faux_conn): +def test_select_notin_param_empty(faux_conn, last_query): [[isnotin]] = faux_conn.execute( sqlalchemy.select( [sqlalchemy.literal(1).notin_(sqlalchemy.bindparam("q", expanding=True))] ), dict(q=[]), ) - assert isnotin - assert faux_conn.test_data["execute"][-1] == ( - "SELECT (%(param_1:INT64)s NOT IN(NULL) OR (1 = 1)) AS `anon_1`" - if sqlalchemy.__version__ >= "1.4" - else "SELECT (%(param_1:INT64)s NOT IN UNNEST([ ])) AS `anon_1`", - {"param_1": 1}, + if sqlalchemy_version >= packaging.version.parse("1.4"): + last_query( + "SELECT (%(param_1:INT64)s NOT IN UNNEST(%(q:INT64)s)) AS `anon_1`", + {"param_1": 1, "q": []}, + ) + else: + assert isnotin + last_query( + "SELECT (%(param_1:INT64)s NOT IN UNNEST([ ])) AS `anon_1`", {"param_1": 1} + ) + + +def test_literal_binds_kwarg_with_an_IN_operator_252(faux_conn): + table = setup_table( + faux_conn, + "test", + sqlalchemy.Column("val", sqlalchemy.Integer), + initial_data=[dict(val=i) for i in range(3)], + ) + q = sqlalchemy.select([table.c.val]).where(table.c.val.in_([2])) + + def nstr(q): + return " ".join(str(q).strip().split()) + + assert ( + nstr(q.compile(faux_conn.engine, compile_kwargs={"literal_binds": True})) + == "SELECT `test`.`val` FROM `test` WHERE `test`.`val` IN (2)" + ) + + +@sqlalchemy_1_4_or_higher +@pytest.mark.parametrize("alias", [True, False]) +def test_unnest(faux_conn, alias): + from sqlalchemy import String + from sqlalchemy_bigquery import ARRAY + + table = setup_table(faux_conn, "t", sqlalchemy.Column("objects", ARRAY(String))) + fcall = sqlalchemy.func.unnest(table.c.objects) + if alias: + query = fcall.alias("foo_objects").column + else: + query = fcall.column_valued("foo_objects") + compiled = str(sqlalchemy.select(query).compile(faux_conn.engine)) + assert " ".join(compiled.strip().split()) == ( + "SELECT `foo_objects` FROM `t` `t_1`, unnest(`t_1`.`objects`) AS `foo_objects`" + ) + + +@sqlalchemy_1_4_or_higher +@pytest.mark.parametrize("alias", [True, False]) +def test_table_valued_alias_w_multiple_references_to_the_same_table(faux_conn, alias): + from sqlalchemy import String + from sqlalchemy_bigquery import ARRAY + + table = setup_table(faux_conn, "t", sqlalchemy.Column("objects", ARRAY(String))) + fcall = sqlalchemy.func.foo(table.c.objects, table.c.objects) + if alias: + query = fcall.alias("foo_objects").column + else: + query = fcall.column_valued("foo_objects") + compiled = str(sqlalchemy.select(query).compile(faux_conn.engine)) + assert " ".join(compiled.strip().split()) == ( + "SELECT `foo_objects` " + "FROM `t` `t_1`, foo(`t_1`.`objects`, `t_1`.`objects`) AS `foo_objects`" + ) + + +@sqlalchemy_1_4_or_higher +@pytest.mark.parametrize("alias", [True, False]) +def test_unnest_w_no_table_references(faux_conn, alias): + fcall = sqlalchemy.func.unnest([1, 2, 3]) + if alias: + query = fcall.alias().column + else: + query = fcall.column_valued() + compiled = str(sqlalchemy.select(query).compile(faux_conn.engine)) + assert " ".join(compiled.strip().split()) == ( + "SELECT `anon_1` FROM unnest(%(unnest_1)s) AS `anon_1`" ) diff --git a/tests/unit/test_sqlalchemy_bigquery.py b/tests/unit/test_sqlalchemy_bigquery.py index 78b5b3cd..a4c81367 100644 --- a/tests/unit/test_sqlalchemy_bigquery.py +++ b/tests/unit/test_sqlalchemy_bigquery.py @@ -13,6 +13,8 @@ import pytest import sqlalchemy +from conftest import setup_table + @pytest.fixture def mock_bigquery_client(): @@ -158,3 +160,21 @@ def test__remove_type_from_empty_in(inp, outp): r = BigQueryExecutionContext._BigQueryExecutionContext__remove_type_from_empty_in assert r(None, inp) == outp + + +def test_multi_value_insert(faux_conn, last_query): + table = setup_table(faux_conn, "t", sqlalchemy.Column("id", sqlalchemy.Integer)) + faux_conn.execute(table.insert().values([dict(id=i) for i in range(3)])) + + last_query( + "INSERT INTO `t` (`id`) VALUES" + " (%(id_m0:INT64)s), (%(id_m1:INT64)s), (%(id_m2:INT64)s)", + {"id_m0": 0, "id_m1": 1, "id_m2": 2}, + ) + + +def test_follow_dialect_attribute_convention(): + import sqlalchemy_bigquery.base + + assert sqlalchemy_bigquery.dialect is sqlalchemy_bigquery.BigQueryDialect + assert sqlalchemy_bigquery.base.dialect is sqlalchemy_bigquery.BigQueryDialect