Skip to content

refactor: consolidate BigQuery client creation and set user-agent #100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions pybigquery/_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2021 The PyBigQuery Authors
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

from google.api_core import client_info
import google.auth
from google.cloud import bigquery
from google.oauth2 import service_account
import sqlalchemy


USER_AGENT_TEMPLATE = "sqlalchemy/{}"
SCOPES = (
"https://www.googleapis.com/auth/bigquery",
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/drive",
)


def google_client_info():
user_agent = USER_AGENT_TEMPLATE.format(sqlalchemy.__version__)
return client_info.ClientInfo(user_agent=user_agent)


def create_bigquery_client(
credentials_info=None,
credentials_path=None,
default_query_job_config=None,
location=None,
project_id=None,
):
default_project = None

if credentials_path:
credentials = service_account.Credentials.from_service_account_file(
credentials_path
)
credentials = credentials.with_scopes(SCOPES)
default_project = credentials.project
elif credentials_info:
credentials = service_account.Credentials.from_service_account_info(
credentials_info
)
credentials = credentials.with_scopes(SCOPES)
default_project = credentials.project
else:
credentials, default_project = google.auth.default(scopes=SCOPES)

if project_id is None:
project_id = default_project

return bigquery.Client(
client_info=google_client_info(),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to add hook to check if client_info is available before trying to add it.

https://github.com/pydata/pandas-gbq/blob/853f7922a0e0c853ed4d295ff14c41bfedf7e8d1/pandas_gbq/gbq.py#L364-L369

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, 1.12.0 was released 2019-05-16. That's close enough to the NEP 29 recommended 2 years, that I think it's worth just bumping the minimum version of google-cloud-bigquery in this case.

project=project_id,
credentials=credentials,
location=location,
default_query_job_config=default_query_job_config,
)
13 changes: 6 additions & 7 deletions pybigquery/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,18 @@
from __future__ import absolute_import
from __future__ import unicode_literals

from google.cloud.bigquery import Client, QueryJobConfig
from google.cloud.bigquery import QueryJobConfig

from pybigquery import _helpers


class ApiClient(object):
def __init__(self, credentials_path=None, location=None):
self.credentials_path = credentials_path
self.location = location
if self.credentials_path:
self.client = Client.from_service_account_json(
self.credentials_path, location=self.location
)
else:
self.client = Client(location=self.location)
self.client = _helpers.create_bigquery_client(
credentials_path=credentials_path, location=location
)

def dry_run_query(self, query):
job_config = QueryJobConfig()
Expand Down
65 changes: 11 additions & 54 deletions pybigquery/sqlalchemy_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@
import operator

from google import auth
from google.cloud import bigquery
from google.cloud.bigquery import dbapi
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import TableReference
from google.oauth2 import service_account
from google.api_core.exceptions import NotFound
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy import types, util
Expand All @@ -46,6 +44,7 @@
import re

from .parse_url import parse_url
from pybigquery import _helpers

FIELD_ILLEGAL_CHARACTERS = re.compile(r"[^\w]+")

Expand Down Expand Up @@ -342,30 +341,6 @@ def _add_default_dataset_to_job_config(job_config, project_id, dataset_id):

job_config.default_dataset = "{}.{}".format(project_id, dataset_id)

def _create_client_from_credentials(
self, credentials, default_query_job_config, project_id
):
if project_id is None:
project_id = credentials.project_id

scopes = (
"https://www.googleapis.com/auth/bigquery",
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/drive",
)
credentials = credentials.with_scopes(scopes)

self._add_default_dataset_to_job_config(
default_query_job_config, project_id, self.dataset_id
)

return bigquery.Client(
project=project_id,
credentials=credentials,
location=self.location,
default_query_job_config=default_query_job_config,
)

def create_connect_args(self, url):
(
project_id,
Expand All @@ -380,34 +355,16 @@ def create_connect_args(self, url):
self.location = location or self.location
self.credentials_path = credentials_path or self.credentials_path
self.dataset_id = dataset_id

if self.credentials_path:
credentials = service_account.Credentials.from_service_account_file(
self.credentials_path
)
client = self._create_client_from_credentials(
credentials, default_query_job_config, project_id
)

elif self.credentials_info:
credentials = service_account.Credentials.from_service_account_info(
self.credentials_info
)
client = self._create_client_from_credentials(
credentials, default_query_job_config, project_id
)

else:
self._add_default_dataset_to_job_config(
default_query_job_config, project_id, dataset_id
)

client = bigquery.Client(
project=project_id,
location=self.location,
default_query_job_config=default_query_job_config,
)

self._add_default_dataset_to_job_config(
default_query_job_config, project_id, dataset_id
)
client = _helpers.create_bigquery_client(
credentials_path=self.credentials_path,
credentials_info=self.credentials_info,
project_id=project_id,
location=self.location,
default_query_job_config=default_query_job_config,
)
return ([client], {})

def _json_deserializer(self, row):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def readme():
platforms="Posix; MacOS X; Windows",
install_requires=[
"sqlalchemy>=1.1.9,<1.4.0dev",
"google-cloud-bigquery>=1.6.0",
"google-auth>=1.2.0,<2.0dev",
"google-cloud-bigquery>=1.12.0",
"future",
],
python_requires=">=3.6, <3.10",
Expand Down
3 changes: 2 additions & 1 deletion testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
sqlalchemy==1.1.9
google-cloud-bigquery==1.6.0
google-auth==1.2.0
google-cloud-bigquery==1.12.0
138 changes: 138 additions & 0 deletions tests/unit/test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Copyright 2021 The PyBigQuery Authors
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

from unittest import mock

import google.auth
import google.auth.credentials
from google.oauth2 import service_account
import pytest


class AnonymousCredentialsWithProject(google.auth.credentials.AnonymousCredentials):
"""Fake credentials to trick isinstance"""

def __init__(self, project):
super().__init__()
self.project = project

def with_scopes(self, scopes):
return self


@pytest.fixture(scope="session")
def module_under_test():
from pybigquery import _helpers

return _helpers


def test_create_bigquery_client_with_credentials_path(monkeypatch, module_under_test):
mock_service_account = mock.create_autospec(service_account.Credentials)
mock_service_account.from_service_account_file.return_value = AnonymousCredentialsWithProject(
"service-account-project"
)
monkeypatch.setattr(service_account, "Credentials", mock_service_account)

bqclient = module_under_test.create_bigquery_client(
credentials_path="path/to/key.json",
)

assert bqclient.project == "service-account-project"


def test_create_bigquery_client_with_credentials_path_respects_project(
monkeypatch, module_under_test
):
"""Test that project_id is used, even when there is a default project.

https://github.com/googleapis/python-bigquery-sqlalchemy/issues/48
"""
mock_service_account = mock.create_autospec(service_account.Credentials)
mock_service_account.from_service_account_file.return_value = AnonymousCredentialsWithProject(
"service-account-project"
)
monkeypatch.setattr(service_account, "Credentials", mock_service_account)

bqclient = module_under_test.create_bigquery_client(
credentials_path="path/to/key.json", project_id="connection-url-project",
)

assert bqclient.project == "connection-url-project"


def test_create_bigquery_client_with_credentials_info(monkeypatch, module_under_test):
mock_service_account = mock.create_autospec(service_account.Credentials)
mock_service_account.from_service_account_info.return_value = AnonymousCredentialsWithProject(
"service-account-project"
)
monkeypatch.setattr(service_account, "Credentials", mock_service_account)

bqclient = module_under_test.create_bigquery_client(
credentials_info={
"type": "service_account",
"project_id": "service-account-project",
},
)

assert bqclient.project == "service-account-project"


def test_create_bigquery_client_with_credentials_info_respects_project(
monkeypatch, module_under_test
):
"""Test that project_id is used, even when there is a default project.

https://github.com/googleapis/python-bigquery-sqlalchemy/issues/48
"""
mock_service_account = mock.create_autospec(service_account.Credentials)
mock_service_account.from_service_account_info.return_value = AnonymousCredentialsWithProject(
"service-account-project"
)
monkeypatch.setattr(service_account, "Credentials", mock_service_account)

bqclient = module_under_test.create_bigquery_client(
credentials_info={
"type": "service_account",
"project_id": "service-account-project",
},
project_id="connection-url-project",
)

assert bqclient.project == "connection-url-project"


def test_create_bigquery_client_with_default_credentials(
monkeypatch, module_under_test
):
def mock_default_credentials(*args, **kwargs):
return (google.auth.credentials.AnonymousCredentials(), "default-project")

monkeypatch.setattr(google.auth, "default", mock_default_credentials)

bqclient = module_under_test.create_bigquery_client()

assert bqclient.project == "default-project"


def test_create_bigquery_client_with_default_credentials_respects_project(
monkeypatch, module_under_test
):
"""Test that project_id is used, even when there is a default project.

https://github.com/googleapis/python-bigquery-sqlalchemy/issues/48
"""

def mock_default_credentials(*args, **kwargs):
return (google.auth.credentials.AnonymousCredentials(), "default-project")

monkeypatch.setattr(google.auth, "default", mock_default_credentials)

bqclient = module_under_test.create_bigquery_client(
project_id="connection-url-project",
)

assert bqclient.project == "connection-url-project"