Skip to content

Commit f615b6a

Browse files
authored
BigQuery Storage: Update to use faster Arrow data format. (GoogleCloudPlatform#2269)
1 parent 9855121 commit f615b6a

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

bigquery_storage/to_dataframe/main_test.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def clients():
3333
# Make clients.
3434
bqclient = bigquery.Client(
3535
credentials=credentials,
36-
project=your_project_id
36+
project=your_project_id,
3737
)
3838
bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
3939
credentials=credentials
@@ -90,11 +90,6 @@ def test_query_to_dataframe(capsys, clients):
9090
dataframe = (
9191
bqclient.query(query_string)
9292
.result()
93-
94-
# Note: The BigQuery Storage API cannot be used to download small query
95-
# results, but as of google-cloud-bigquery version 1.11.1, the
96-
# to_dataframe method will fallback to the tabledata.list API when the
97-
# BigQuery Storage API fails to read the query results.
9893
.to_dataframe(bqstorage_client=bqstorageclient)
9994
)
10095
print(dataframe.head())
@@ -126,7 +121,18 @@ def test_session_to_dataframe(capsys, clients):
126121

127122
parent = "projects/{}".format(your_project_id)
128123
session = bqstorageclient.create_read_session(
129-
table, parent, read_options=read_options
124+
table,
125+
parent,
126+
read_options=read_options,
127+
# This API can also deliver data serialized in Apache Avro format.
128+
# This example leverages Apache Arrow.
129+
format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW,
130+
# We use a LIQUID strategy in this example because we only read from a
131+
# single stream. Consider BALANCED if you're consuming multiple streams
132+
# concurrently and want more consistent stream sizes.
133+
sharding_strategy=(
134+
bigquery_storage_v1beta1.enums.ShardingStrategy.LIQUID
135+
),
130136
)
131137

132138
# This example reads from only a single stream. Read from multiple streams
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
google-auth==1.6.2
2-
google-cloud-bigquery-storage==0.3.0
3-
google-cloud-bigquery==1.11.1
4-
fastavro==0.21.17
2+
google-cloud-bigquery-storage==0.6.0
3+
google-cloud-bigquery==1.17.0
4+
pyarrow==0.13.0
55
ipython==7.2.0
6-
pandas==0.24.0
6+
pandas==0.24.2

0 commit comments

Comments
 (0)