Skip to content
This repository was archived by the owner on Mar 17, 2020. It is now read-only.

Commit b4f6649

Browse files
authored
Samples for pandas-gbq migration guide (GoogleCloudPlatform#1811)
1 parent 9473909 commit b4f6649

File tree

3 files changed

+227
-0
lines changed

3 files changed

+227
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# pandas-gbq Migration Guide
2+
3+
This directory contains samples used in the `pandas-gbq` to
4+
`google-cloud-bigquery` migration guide.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
google-cloud-bigquery[pandas,pyarrow]==1.6.0
2+
pandas-gbq==0.7.0
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import time
17+
18+
import pytest
19+
20+
21+
@pytest.fixture
22+
def temp_dataset():
23+
from google.cloud import bigquery
24+
25+
client = bigquery.Client()
26+
dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
27+
dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
28+
dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
29+
yield dataset
30+
client.delete_dataset(dataset, delete_contents=True)
31+
32+
33+
def test_client_library_query():
34+
# [START bigquery_migration_client_library_query]
35+
from google.cloud import bigquery
36+
37+
client = bigquery.Client()
38+
sql = """
39+
SELECT name
40+
FROM `bigquery-public-data.usa_names.usa_1910_current`
41+
WHERE state = 'TX'
42+
LIMIT 100
43+
"""
44+
45+
# Run a Standard SQL query using the environment's default project
46+
df = client.query(sql).to_dataframe()
47+
48+
# Run a Standard SQL query with the project set explicitly
49+
project_id = 'your-project-id'
50+
# [END bigquery_migration_client_library_query]
51+
assert len(df) > 0
52+
project_id = os.environ['GCLOUD_PROJECT']
53+
# [START bigquery_migration_client_library_query]
54+
df = client.query(sql, project=project_id).to_dataframe()
55+
# [END bigquery_migration_client_library_query]
56+
assert len(df) > 0
57+
58+
59+
def test_pandas_gbq_query():
60+
# [START bigquery_migration_pandas_gbq_query]
61+
import pandas
62+
63+
sql = """
64+
SELECT name
65+
FROM `bigquery-public-data.usa_names.usa_1910_current`
66+
WHERE state = 'TX'
67+
LIMIT 100
68+
"""
69+
70+
# Run a Standard SQL query using the environment's default project
71+
df = pandas.read_gbq(sql, dialect='standard')
72+
73+
# Run a Standard SQL query with the project set explicitly
74+
project_id = 'your-project-id'
75+
# [END bigquery_migration_pandas_gbq_query]
76+
assert len(df) > 0
77+
project_id = os.environ['GCLOUD_PROJECT']
78+
# [START bigquery_migration_pandas_gbq_query]
79+
df = pandas.read_gbq(sql, project_id=project_id, dialect='standard')
80+
# [END bigquery_migration_pandas_gbq_query]
81+
assert len(df) > 0
82+
83+
84+
def test_client_library_legacy_query():
85+
# [START bigquery_migration_client_library_query_legacy]
86+
from google.cloud import bigquery
87+
88+
client = bigquery.Client()
89+
sql = """
90+
SELECT name
91+
FROM [bigquery-public-data:usa_names.usa_1910_current]
92+
WHERE state = 'TX'
93+
LIMIT 100
94+
"""
95+
query_config = bigquery.QueryJobConfig()
96+
query_config.use_legacy_sql = True
97+
98+
# Run a Standard SQL query using the environment's default project
99+
df = client.query(sql, job_config=query_config).to_dataframe()
100+
# [END bigquery_migration_client_library_query_legacy]
101+
assert len(df) > 0
102+
103+
104+
def test_pandas_gbq_legacy_query():
105+
# [START bigquery_migration_pandas_gbq_query_legacy]
106+
import pandas
107+
108+
sql = """
109+
SELECT name
110+
FROM [bigquery-public-data:usa_names.usa_1910_current]
111+
WHERE state = 'TX'
112+
LIMIT 100
113+
"""
114+
115+
df = pandas.read_gbq(sql, dialect='legacy')
116+
# [END bigquery_migration_pandas_gbq_query_legacy]
117+
assert len(df) > 0
118+
119+
120+
def test_client_library_query_with_parameters():
121+
# [START bigquery_migration_client_library_query_parameters]
122+
from google.cloud import bigquery
123+
124+
client = bigquery.Client()
125+
sql = """
126+
SELECT name
127+
FROM `bigquery-public-data.usa_names.usa_1910_current`
128+
WHERE state = @state
129+
LIMIT @limit
130+
"""
131+
query_config = bigquery.QueryJobConfig()
132+
query_config.query_parameters = [
133+
bigquery.ScalarQueryParameter('state', 'STRING', 'TX'),
134+
bigquery.ScalarQueryParameter('limit', 'INTEGER', 100)
135+
]
136+
df = client.query(sql, job_config=query_config).to_dataframe()
137+
# [END bigquery_migration_client_library_query_parameters]
138+
assert len(df) > 0
139+
140+
141+
def test_pandas_gbq_query_with_parameters():
142+
# [START bigquery_migration_pandas_gbq_query_parameters]
143+
import pandas
144+
145+
sql = """
146+
SELECT name
147+
FROM `bigquery-public-data.usa_names.usa_1910_current`
148+
WHERE state = @state
149+
LIMIT @limit
150+
"""
151+
query_config = {
152+
'query': {
153+
'parameterMode': 'NAMED',
154+
'queryParameters': [
155+
{
156+
'name': 'state',
157+
'parameterType': {'type': 'STRING'},
158+
'parameterValue': {'value': 'TX'}
159+
},
160+
{
161+
'name': 'limit',
162+
'parameterType': {'type': 'INTEGER'},
163+
'parameterValue': {'value': 100}
164+
}
165+
]
166+
}
167+
}
168+
df = pandas.read_gbq(sql, configuration=query_config)
169+
# [END bigquery_migration_pandas_gbq_query_parameters]
170+
assert len(df) > 0
171+
172+
173+
def test_client_library_upload_from_dataframe(temp_dataset):
174+
# [START bigquery_migration_client_library_upload_from_dataframe]
175+
from google.cloud import bigquery
176+
import pandas
177+
178+
df = pandas.DataFrame(
179+
{
180+
'my_string': ['a', 'b', 'c'],
181+
'my_int64': [1, 2, 3],
182+
'my_float64': [4.0, 5.0, 6.0],
183+
}
184+
)
185+
client = bigquery.Client()
186+
dataset_ref = client.dataset('my_dataset')
187+
# [END bigquery_migration_client_library_upload_from_dataframe]
188+
dataset_ref = client.dataset(temp_dataset.dataset_id)
189+
# [START bigquery_migration_client_library_upload_from_dataframe]
190+
table_ref = dataset_ref.table('new_table')
191+
client.load_table_from_dataframe(df, table_ref).result()
192+
# [END bigquery_migration_client_library_upload_from_dataframe]
193+
client = bigquery.Client()
194+
table = client.get_table(table_ref)
195+
assert table.num_rows == 3
196+
197+
198+
def test_pandas_gbq_upload_from_dataframe(temp_dataset):
199+
from google.cloud import bigquery
200+
# [START bigquery_migration_pandas_gbq_upload_from_dataframe]
201+
import pandas
202+
203+
df = pandas.DataFrame(
204+
{
205+
'my_string': ['a', 'b', 'c'],
206+
'my_int64': [1, 2, 3],
207+
'my_float64': [4.0, 5.0, 6.0],
208+
}
209+
)
210+
full_table_id = 'my_dataset.new_table'
211+
project_id = 'my-project-id'
212+
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
213+
table_id = 'new_table'
214+
full_table_id = '{}.{}'.format(temp_dataset.dataset_id, table_id)
215+
project_id = os.environ['GCLOUD_PROJECT']
216+
# [START bigquery_migration_pandas_gbq_upload_from_dataframe]
217+
df.to_gbq(full_table_id, project_id=project_id)
218+
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
219+
client = bigquery.Client()
220+
table = client.get_table(temp_dataset.table(table_id))
221+
assert table.num_rows == 3

0 commit comments

Comments
 (0)