Skip to content

Commit 091fa2a

Browse files
authored
Merge branch 'master' into table-notebooks
2 parents da2cac4 + d707f24 commit 091fa2a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+3102
-161
lines changed

bigquery_storage/to_dataframe/main_test.py

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import uuid
16-
1715
import pytest
1816

1917

@@ -72,48 +70,11 @@ def test_table_to_dataframe(capsys, clients):
7270
assert "country_name" in out
7371

7472

75-
@pytest.fixture
76-
def temporary_dataset(clients):
77-
from google.cloud import bigquery
78-
79-
bqclient, _ = clients
80-
81-
# [START bigquerystorage_pandas_tutorial_all]
82-
# [START bigquerystorage_pandas_tutorial_create_dataset]
83-
# Set the dataset_id to the dataset used to store temporary results.
84-
dataset_id = "query_results_dataset"
85-
# [END bigquerystorage_pandas_tutorial_create_dataset]
86-
# [END bigquerystorage_pandas_tutorial_all]
87-
88-
dataset_id = "bqstorage_to_dataset_{}".format(uuid.uuid4().hex)
89-
90-
# [START bigquerystorage_pandas_tutorial_all]
91-
# [START bigquerystorage_pandas_tutorial_create_dataset]
92-
dataset_ref = bqclient.dataset(dataset_id)
93-
dataset = bigquery.Dataset(dataset_ref)
94-
95-
# Remove tables after 24 hours.
96-
dataset.default_table_expiration_ms = 1000 * 60 * 60 * 24
97-
98-
bqclient.create_dataset(dataset) # API request.
99-
# [END bigquerystorage_pandas_tutorial_create_dataset]
100-
# [END bigquerystorage_pandas_tutorial_all]
101-
yield dataset_ref
102-
# [START bigquerystorage_pandas_tutorial_cleanup]
103-
bqclient.delete_dataset(dataset_ref, delete_contents=True)
104-
# [END bigquerystorage_pandas_tutorial_cleanup]
105-
106-
107-
def test_query_to_dataframe(capsys, clients, temporary_dataset):
108-
from google.cloud import bigquery
109-
73+
def test_query_to_dataframe(capsys, clients):
11074
bqclient, bqstorageclient = clients
111-
dataset_ref = temporary_dataset
11275

11376
# [START bigquerystorage_pandas_tutorial_all]
11477
# [START bigquerystorage_pandas_tutorial_read_query_results]
115-
import uuid
116-
11778
# Download query results.
11879
query_string = """
11980
SELECT
@@ -125,19 +86,15 @@ def test_query_to_dataframe(capsys, clients, temporary_dataset):
12586
WHERE tags like '%google-bigquery%'
12687
ORDER BY view_count DESC
12788
"""
128-
# Use a random table name to avoid overwriting existing tables.
129-
table_id = "queryresults_" + uuid.uuid4().hex
130-
table = dataset_ref.table(table_id)
131-
query_config = bigquery.QueryJobConfig(
132-
# Due to a known issue in the BigQuery Storage API, small query result
133-
# sets cannot be downloaded. To workaround this issue, write results to
134-
# a destination table.
135-
destination=table
136-
)
13789

13890
dataframe = (
139-
bqclient.query(query_string, job_config=query_config)
91+
bqclient.query(query_string)
14092
.result()
93+
94+
# Note: The BigQuery Storage API cannot be used to download small query
95+
# results, but as of google-cloud-bigquery version 1.11.1, the
96+
# to_dataframe method will fallback to the tabledata.list API when the
97+
# BigQuery Storage API fails to read the query results.
14198
.to_dataframe(bqstorage_client=bqstorageclient)
14299
)
143100
print(dataframe.head())
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
google-auth==1.6.2
2-
google-cloud-bigquery-storage==0.2.0
3-
google-cloud-bigquery==1.8.1
2+
google-cloud-bigquery-storage==0.3.0
3+
google-cloud-bigquery==1.11.1
44
fastavro==0.21.17
55
pandas==0.24.0

datalabeling/README.rst

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
.. This file is automatically generated. Do not edit this file directly.
2+
3+
Google Cloud Data Labeling Service Python Samples
4+
===============================================================================
5+
6+
.. image:: https://gstatic.com/cloudssh/images/open-btn.png
7+
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=datalabeling/README.rst
8+
9+
10+
This directory contains samples for Google Cloud Data Labeling Service. `Google Cloud Data Labeling Service`_ allows developers to request having human labelers label a collection of data that you plan to use to train a custom machine learning model.
11+
12+
13+
14+
15+
.. _Google Cloud Data Labeling Service: https://cloud.google.com/data-labeling/docs/
16+
17+
Setup
18+
-------------------------------------------------------------------------------
19+
20+
21+
Authentication
22+
++++++++++++++
23+
24+
This sample requires you to have authentication setup. Refer to the
25+
`Authentication Getting Started Guide`_ for instructions on setting up
26+
credentials for applications.
27+
28+
.. _Authentication Getting Started Guide:
29+
https://cloud.google.com/docs/authentication/getting-started
30+
31+
Install Dependencies
32+
++++++++++++++++++++
33+
34+
#. Clone python-docs-samples and change directory to the sample directory you want to use.
35+
36+
.. code-block:: bash
37+
38+
$ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git
39+
40+
#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions.
41+
42+
.. _Python Development Environment Setup Guide:
43+
https://cloud.google.com/python/setup
44+
45+
#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+.
46+
47+
.. code-block:: bash
48+
49+
$ virtualenv env
50+
$ source env/bin/activate
51+
52+
#. Install the dependencies needed to run the samples.
53+
54+
.. code-block:: bash
55+
56+
$ pip install -r requirements.txt
57+
58+
.. _pip: https://pip.pypa.io/
59+
.. _virtualenv: https://virtualenv.pypa.io/
60+
61+
62+
63+
The client library
64+
-------------------------------------------------------------------------------
65+
66+
This sample uses the `Google Cloud Client Library for Python`_.
67+
You can read the documentation for more details on API usage and use GitHub
68+
to `browse the source`_ and `report issues`_.
69+
70+
.. _Google Cloud Client Library for Python:
71+
https://googlecloudplatform.github.io/google-cloud-python/
72+
.. _browse the source:
73+
https://github.com/GoogleCloudPlatform/google-cloud-python
74+
.. _report issues:
75+
https://github.com/GoogleCloudPlatform/google-cloud-python/issues
76+
77+
78+
.. _Google Cloud SDK: https://cloud.google.com/sdk/

datalabeling/README.rst.in

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This file is used to generate README.rst
2+
3+
product:
4+
name: Google Cloud Data Labeling Service
5+
short_name: Cloud Data Labeling
6+
url: https://cloud.google.com/data-labeling/docs/
7+
description: >
8+
`Google Cloud Data Labeling Service`_ allows developers to request having
9+
human labelers label a collection of data that you plan to use to train a
10+
custom machine learning model.
11+
12+
setup:
13+
- auth
14+
- install_deps
15+
16+
cloud_client_library: true
17+
18+
folder: datalabeling
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import argparse
18+
19+
20+
# [START datalabeling_create_annotation_spec_set_beta]
21+
def create_annotation_spec_set(project_id):
22+
"""Creates a data labeling annotation spec set for the given
23+
Google Cloud project.
24+
"""
25+
from google.cloud import datalabeling_v1beta1 as datalabeling
26+
client = datalabeling.DataLabelingServiceClient()
27+
28+
project_path = client.project_path(project_id)
29+
30+
annotation_spec_1 = datalabeling.types.AnnotationSpec(
31+
display_name='label_1',
32+
description='label_description_1'
33+
)
34+
35+
annotation_spec_2 = datalabeling.types.AnnotationSpec(
36+
display_name='label_2',
37+
description='label_description_2'
38+
)
39+
40+
annotation_spec_set = datalabeling.types.AnnotationSpecSet(
41+
display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME',
42+
description='YOUR_DESCRIPTION',
43+
annotation_specs=[annotation_spec_1, annotation_spec_2]
44+
)
45+
46+
response = client.create_annotation_spec_set(
47+
project_path, annotation_spec_set)
48+
49+
# The format of the resource name:
50+
# project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id}
51+
print('The annotation_spec_set resource name: {}'.format(response.name))
52+
print('Display name: {}'.format(response.display_name))
53+
print('Description: {}'.format(response.description))
54+
print('Annotation specs:')
55+
for annotation_spec in response.annotation_specs:
56+
print('\tDisplay name: {}'.format(annotation_spec.display_name))
57+
print('\tDescription: {}\n'.format(annotation_spec.description))
58+
59+
return response
60+
# [END datalabeling_create_annotation_spec_set_beta]
61+
62+
63+
if __name__ == '__main__':
64+
parser = argparse.ArgumentParser(
65+
description=__doc__,
66+
formatter_class=argparse.RawDescriptionHelpFormatter
67+
)
68+
69+
parser.add_argument(
70+
'--project-id',
71+
help='Project ID. Required.',
72+
required=True
73+
)
74+
75+
args = parser.parse_args()
76+
77+
create_annotation_spec_set(args.project_id)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google, Inc
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
19+
import create_annotation_spec_set
20+
from google.cloud import datalabeling_v1beta1 as datalabeling
21+
import pytest
22+
23+
PROJECT_ID = os.getenv('GCLOUD_PROJECT')
24+
25+
26+
@pytest.mark.slow
27+
def test_create_annotation_spec_set(capsys):
28+
response = create_annotation_spec_set.create_annotation_spec_set(
29+
PROJECT_ID)
30+
out, _ = capsys.readouterr()
31+
assert 'The annotation_spec_set resource name:' in out
32+
33+
# Delete the created annotation spec set.
34+
annotation_spec_set_name = response.name
35+
client = datalabeling.DataLabelingServiceClient()
36+
client.delete_annotation_spec_set(annotation_spec_set_name)

0 commit comments

Comments
 (0)