Skip to content

Commit bb8c80e

Browse files
authored
Add magics tutorial with BigQuery Storage API integration. (GoogleCloudPlatform#2087)
* Add magics tutorial with BigQuery Storage API integration. This is a notebooks tutorial, modeled after the Jupyter notebook example code for BigQuery. Use some caution when running these tests, as they run some large-ish (5 GB processed) queries and download about 500 MB worth of data. This is intentional, as the BigQuery Storage API is most useful for downloading large results. * Update deps. * Don't run big queries on Travis.
1 parent 3d1f403 commit bb8c80e

File tree

2 files changed

+149
-0
lines changed

2 files changed

+149
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import IPython
18+
from IPython.terminal import interactiveshell
19+
from IPython.testing import tools
20+
import pytest
21+
22+
# Ignore semicolon lint warning because semicolons are used in notebooks
23+
# flake8: noqa E703
24+
25+
26+
@pytest.fixture(scope="session")
27+
def ipython():
28+
config = tools.default_config()
29+
config.TerminalInteractiveShell.simple_prompt = True
30+
shell = interactiveshell.TerminalInteractiveShell.instance(config=config)
31+
return shell
32+
33+
34+
@pytest.fixture()
35+
def ipython_interactive(request, ipython):
36+
"""Activate IPython's builtin hooks
37+
38+
for the duration of the test scope.
39+
"""
40+
with ipython.builtin_trap:
41+
yield ipython
42+
43+
44+
def _strip_region_tags(sample_text):
45+
"""Remove blank lines and region tags from sample text"""
46+
magic_lines = [
47+
line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line
48+
]
49+
return "\n".join(magic_lines)
50+
51+
52+
def test_jupyter_small_query(ipython):
53+
ip = IPython.get_ipython()
54+
ip.extension_manager.load_extension("google.cloud.bigquery")
55+
56+
# Include a small query to demonstrate that it falls back to the
57+
# tabledata.list API when the BQ Storage API cannot be used.
58+
sample = """
59+
# [START bigquerystorage_jupyter_tutorial_fallback]
60+
%%bigquery stackoverflow --use_bqstorage_api
61+
SELECT
62+
CONCAT(
63+
'https://stackoverflow.com/questions/',
64+
CAST(id as STRING)) as url,
65+
view_count
66+
FROM `bigquery-public-data.stackoverflow.posts_questions`
67+
WHERE tags like '%google-bigquery%'
68+
ORDER BY view_count DESC
69+
LIMIT 10
70+
# [END bigquerystorage_jupyter_tutorial_fallback]
71+
"""
72+
73+
result = ip.run_cell(_strip_region_tags(sample))
74+
result.raise_error() # Throws an exception if the cell failed.
75+
assert "stackoverflow" in ip.user_ns # verify that variable exists
76+
77+
78+
@pytest.mark.skipif(
79+
"TRAVIS" in os.environ, reason="Not running long-running queries on Travis"
80+
)
81+
def test_jupyter_tutorial(ipython):
82+
ip = IPython.get_ipython()
83+
ip.extension_manager.load_extension("google.cloud.bigquery")
84+
85+
# This code sample intentionally queries a lot of data to demonstrate the
86+
# speed-up of using the BigQuery Storage API to download the results.
87+
sample = """
88+
# [START bigquerystorage_jupyter_tutorial_query]
89+
%%bigquery nodejs_deps --use_bqstorage_api
90+
SELECT
91+
dependency_name,
92+
dependency_platform,
93+
project_name,
94+
project_id,
95+
version_number,
96+
version_id,
97+
dependency_kind,
98+
optional_dependency,
99+
dependency_requirements,
100+
dependency_project_id
101+
FROM
102+
`bigquery-public-data.libraries_io.dependencies`
103+
WHERE
104+
LOWER(dependency_platform) = 'npm'
105+
LIMIT 2500000
106+
# [END bigquerystorage_jupyter_tutorial_query]
107+
"""
108+
result = ip.run_cell(_strip_region_tags(sample))
109+
result.raise_error() # Throws an exception if the cell failed.
110+
111+
assert "nodejs_deps" in ip.user_ns # verify that variable exists
112+
nodejs_deps = ip.user_ns["nodejs_deps"]
113+
114+
# [START bigquerystorage_jupyter_tutorial_results]
115+
nodejs_deps.head()
116+
# [END bigquerystorage_jupyter_tutorial_results]
117+
118+
# [START bigquerystorage_jupyter_tutorial_context]
119+
import google.cloud.bigquery.magics
120+
121+
google.cloud.bigquery.magics.context.use_bqstorage_api = True
122+
# [END bigquerystorage_jupyter_tutorial_context]
123+
124+
sample = """
125+
# [START bigquerystorage_jupyter_tutorial_query]
126+
%%bigquery java_deps
127+
SELECT
128+
dependency_name,
129+
dependency_platform,
130+
project_name,
131+
project_id,
132+
version_number,
133+
version_id,
134+
dependency_kind,
135+
optional_dependency,
136+
dependency_requirements,
137+
dependency_project_id
138+
FROM
139+
`bigquery-public-data.libraries_io.dependencies`
140+
WHERE
141+
LOWER(dependency_platform) = 'maven'
142+
LIMIT 2500000
143+
# [END bigquerystorage_jupyter_tutorial_query]
144+
"""
145+
result = ip.run_cell(_strip_region_tags(sample))
146+
result.raise_error() # Throws an exception if the cell failed.
147+
148+
assert "java_deps" in ip.user_ns # verify that variable exists

bigquery_storage/to_dataframe/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ google-auth==1.6.2
22
google-cloud-bigquery-storage==0.3.0
33
google-cloud-bigquery==1.11.1
44
fastavro==0.21.17
5+
ipython==7.2.0
56
pandas==0.24.0

0 commit comments

Comments
 (0)