Skip to content

Commit eb8bb45

Browse files
authored
Remove temporary dataset from bqstorage pandas tutorial (GoogleCloudPlatform#2088)
* Remove temporary dataset from bqstorage pandas tutorial As of google-cloud-bigquery version 1.11.1, the `to_dataframe` method will fallback to the tabledata.list API when the BigQuery Storage API fails to read the query results. * Remove unused imports
1 parent 2d4ab38 commit eb8bb45

File tree

2 files changed

+9
-52
lines changed

2 files changed

+9
-52
lines changed

bigquery_storage/to_dataframe/main_test.py

+7-50
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import uuid
16-
1715
import pytest
1816

1917

@@ -72,48 +70,11 @@ def test_table_to_dataframe(capsys, clients):
7270
assert "country_name" in out
7371

7472

75-
@pytest.fixture
76-
def temporary_dataset(clients):
77-
from google.cloud import bigquery
78-
79-
bqclient, _ = clients
80-
81-
# [START bigquerystorage_pandas_tutorial_all]
82-
# [START bigquerystorage_pandas_tutorial_create_dataset]
83-
# Set the dataset_id to the dataset used to store temporary results.
84-
dataset_id = "query_results_dataset"
85-
# [END bigquerystorage_pandas_tutorial_create_dataset]
86-
# [END bigquerystorage_pandas_tutorial_all]
87-
88-
dataset_id = "bqstorage_to_dataset_{}".format(uuid.uuid4().hex)
89-
90-
# [START bigquerystorage_pandas_tutorial_all]
91-
# [START bigquerystorage_pandas_tutorial_create_dataset]
92-
dataset_ref = bqclient.dataset(dataset_id)
93-
dataset = bigquery.Dataset(dataset_ref)
94-
95-
# Remove tables after 24 hours.
96-
dataset.default_table_expiration_ms = 1000 * 60 * 60 * 24
97-
98-
bqclient.create_dataset(dataset) # API request.
99-
# [END bigquerystorage_pandas_tutorial_create_dataset]
100-
# [END bigquerystorage_pandas_tutorial_all]
101-
yield dataset_ref
102-
# [START bigquerystorage_pandas_tutorial_cleanup]
103-
bqclient.delete_dataset(dataset_ref, delete_contents=True)
104-
# [END bigquerystorage_pandas_tutorial_cleanup]
105-
106-
107-
def test_query_to_dataframe(capsys, clients, temporary_dataset):
108-
from google.cloud import bigquery
109-
73+
def test_query_to_dataframe(capsys, clients):
11074
bqclient, bqstorageclient = clients
111-
dataset_ref = temporary_dataset
11275

11376
# [START bigquerystorage_pandas_tutorial_all]
11477
# [START bigquerystorage_pandas_tutorial_read_query_results]
115-
import uuid
116-
11778
# Download query results.
11879
query_string = """
11980
SELECT
@@ -125,19 +86,15 @@ def test_query_to_dataframe(capsys, clients, temporary_dataset):
12586
WHERE tags like '%google-bigquery%'
12687
ORDER BY view_count DESC
12788
"""
128-
# Use a random table name to avoid overwriting existing tables.
129-
table_id = "queryresults_" + uuid.uuid4().hex
130-
table = dataset_ref.table(table_id)
131-
query_config = bigquery.QueryJobConfig(
132-
# Due to a known issue in the BigQuery Storage API, small query result
133-
# sets cannot be downloaded. To workaround this issue, write results to
134-
# a destination table.
135-
destination=table
136-
)
13789

13890
dataframe = (
139-
bqclient.query(query_string, job_config=query_config)
91+
bqclient.query(query_string)
14092
.result()
93+
94+
# Note: The BigQuery Storage API cannot be used to download small query
95+
# results, but as of google-cloud-bigquery version 1.11.1, the
96+
# to_dataframe method will fallback to the tabledata.list API when the
97+
# BigQuery Storage API fails to read the query results.
14198
.to_dataframe(bqstorage_client=bqstorageclient)
14299
)
143100
print(dataframe.head())
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
google-auth==1.6.2
2-
google-cloud-bigquery-storage==0.2.0
3-
google-cloud-bigquery==1.8.1
2+
google-cloud-bigquery-storage==0.3.0
3+
google-cloud-bigquery==1.11.1
44
fastavro==0.21.17
55
pandas==0.24.0

0 commit comments

Comments
 (0)