From cc7696afc08a601979fd6d47367275fe88714428 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 12 Dec 2019 15:52:09 -0700 Subject: [PATCH 01/11] automl: add base dataset samples for automl ga --- automl/cloud-client/delete_dataset.py | 35 +++++++ automl/cloud-client/delete_dataset_test.py | 49 ++++++++++ automl/cloud-client/export_dataset.py | 40 ++++++++ automl/cloud-client/export_dataset_test.py | 44 +++++++++ automl/cloud-client/get_dataset.py | 102 +++++++++++++++++++++ automl/cloud-client/get_dataset_test.py | 28 ++++++ automl/cloud-client/import_dataset.py | 42 +++++++++ automl/cloud-client/import_dataset_test.py | 63 +++++++++++++ automl/cloud-client/list_datasets.py | 102 +++++++++++++++++++++ automl/cloud-client/list_datasets_test.py | 29 ++++++ automl/cloud-client/requirements.txt | 2 + 11 files changed, 536 insertions(+) create mode 100644 automl/cloud-client/delete_dataset.py create mode 100644 automl/cloud-client/delete_dataset_test.py create mode 100644 automl/cloud-client/export_dataset.py create mode 100644 automl/cloud-client/export_dataset_test.py create mode 100644 automl/cloud-client/get_dataset.py create mode 100644 automl/cloud-client/get_dataset_test.py create mode 100644 automl/cloud-client/import_dataset.py create mode 100644 automl/cloud-client/import_dataset_test.py create mode 100644 automl/cloud-client/list_datasets.py create mode 100644 automl/cloud-client/list_datasets_test.py create mode 100644 automl/cloud-client/requirements.txt diff --git a/automl/cloud-client/delete_dataset.py b/automl/cloud-client/delete_dataset.py new file mode 100644 index 00000000000..c93f9d4f6c6 --- /dev/null +++ b/automl/cloud-client/delete_dataset.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_dataset(project_id, dataset_id): + """Delete a dataset.""" + # [START automl_delete_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + # dataset_id = 'YOUR_DATASET_ID' + + client = automl.AutoMlClient() + # Get the full path of the dataset + dataset_full_id = client.dataset_path( + project_id, "us-central1", dataset_id + ) + response = client.delete_dataset(dataset_full_id) + + print(u"Dataset deleted. {}".format(response.result())) + # [END automl_delete_dataset] diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py new file mode 100644 index 00000000000..f5d70764924 --- /dev/null +++ b/automl/cloud-client/delete_dataset_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os + +import pytest + +import delete_dataset + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +BUCKET_ID = "{}-lcm".format(PROJECT_ID) + + +@pytest.fixture(scope="function") +def create_dataset(): + from google.cloud import automl + + client = automl.AutoMlClient() + project_location = client.location_path(PROJECT_ID, "us-central1") + display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + metadata = automl.types.TextExtractionDatasetMetadata() + dataset = automl.types.Dataset( + display_name=display_name, text_extraction_dataset_metadata=metadata + ) + response = client.create_dataset(project_location, dataset) + dataset_id = response.result().name.split("/")[-1] + + yield dataset_id + + +def test_delete_dataset(capsys, create_dataset): + # delete dataset + delete_dataset.delete_dataset(PROJECT_ID, create_dataset) + out, _ = capsys.readouterr() + assert "Dataset deleted." in out diff --git a/automl/cloud-client/export_dataset.py b/automl/cloud-client/export_dataset.py new file mode 100644 index 00000000000..081ee75a934 --- /dev/null +++ b/automl/cloud-client/export_dataset.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def export_dataset(project_id, dataset_id, gcs_uri): + """Export a dataset.""" + # [START automl_export_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + # dataset_id = 'YOUR_DATASET_ID' + # gcs_uri = 'gs://BUCKET_ID/path_to_export/' + + client = automl.AutoMlClient() + + # Get the full path of the dataset + dataset_full_id = client.dataset_path( + project_id, "us-central1", dataset_id + ) + + gcs_destination = automl.types.GcsDestination(output_uri_prefix=gcs_uri) + output_config = automl.types.OutputConfig(gcs_destination=gcs_destination) + + response = client.export_data(dataset_full_id, output_config) + print(u"Dataset exported. {}".format(response.result())) + # [END automl_export_dataset] diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py new file mode 100644 index 00000000000..841c6f7127d --- /dev/null +++ b/automl/cloud-client/export_dataset_test.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import export_dataset + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +BUCKET_ID = "{}-lcm".format(PROJECT_ID) +DATASET_ID = "TEN4058147884539838464" + + +@pytest.mark.slow +def test_export_dataset(capsys): + export_dataset.export_dataset( + PROJECT_ID, DATASET_ID, "gs://{}/TEST_EXPORT_OUTPUT/".format(BUCKET_ID) + ) + + out, _ = capsys.readouterr() + assert "Dataset exported" in out + + # Delete the created files + from google.cloud import storage + + storage_client = storage.Client() + bucket = storage_client.get_bucket(BUCKET_ID) + if len(list(bucket.list_blobs(prefix="TEST_EXPORT_OUTPUT"))) > 0: + for blob in bucket.list_blobs(prefix="TEST_EXPORT_OUTPUT"): + blob.delete() diff --git a/automl/cloud-client/get_dataset.py b/automl/cloud-client/get_dataset.py new file mode 100644 index 00000000000..89f84960fa4 --- /dev/null +++ b/automl/cloud-client/get_dataset.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_dataset(project_id, dataset_id): + """Get a dataset.""" + # [START automl_language_entity_extraction_get_dataset] + # [START automl_language_sentiment_analysis_get_dataset] + # [START automl_language_text_classification_get_dataset] + # [START automl_translate_get_dataset] + # [START automl_vision_classification_get_dataset] + # [START automl_vision_object_detection_get_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + # dataset_id = 'YOUR_DATASET_ID' + + client = automl.AutoMlClient() + # Get the full path of the dataset + dataset_full_id = client.dataset_path( + project_id, "us-central1", dataset_id + ) + dataset = client.get_dataset(dataset_full_id) + + # Display the dataset information + print(u"Dataset name: {}".format(dataset.name)) + print(u"Dataset id: {}".format(dataset.name.split("/")[-1])) + print(u"Dataset display name: {}".format(dataset.display_name)) + print("Dataset create time:") + print(u"\tseconds: {}".format(dataset.create_time.seconds)) + print(u"\tnanos: {}".format(dataset.create_time.nanos)) + # [END automl_language_sentiment_analysis_get_dataset] + # [END automl_language_text_classification_get_dataset] + # [END automl_translate_get_dataset] + # [END automl_vision_classification_get_dataset] + # [END automl_vision_object_detection_get_dataset] + print( + "Text extraction dataset metadata: {}".format( + dataset.text_extraction_dataset_metadata + ) + ) + # [END automl_language_entity_extraction_get_dataset] + + # [START automl_language_sentiment_analysis_get_dataset] + print( + "Text sentiment dataset metadata: {}".format( + dataset.text_sentiment_dataset_metadata + ) + ) + # [END automl_language_sentiment_analysis_get_dataset] + + # [START automl_language_text_classification_get_dataset] + print( + "Text classification dataset metadata: {}".format( + dataset.text_classification_dataset_metadata + ) + ) + # [END automl_language_text_classification_get_dataset] + + # [START automl_translate_get_dataset] + print("Translation dataset metadata:") + print( + u"\tsource_language_code: {}".format( + dataset.translation_dataset_metadata.source_language_code + ) + ) + print( + u"\ttarget_language_code: {}".format( + dataset.translation_dataset_metadata.target_language_code + ) + ) + # [END automl_translate_get_dataset] + + # [START automl_vision_classification_get_dataset] + print( + "Image classification dataset metadata: {}".format( + dataset.image_classification_dataset_metadata + ) + ) + # [END automl_vision_classification_get_dataset] + + # [START automl_vision_object_detection_get_dataset] + print( + "Image object detection dataset metadata: {}".format( + dataset.image_object_detection_dataset_metadata + ) + ) + # [END automl_vision_object_detection_get_dataset] diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py new file mode 100644 index 00000000000..5c1aa1c1bd6 --- /dev/null +++ b/automl/cloud-client/get_dataset_test.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import get_dataset + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TEN4058147884539838464" + + +def test_get_dataset(capsys): + get_dataset.get_dataset(PROJECT_ID, DATASET_ID) + out, _ = capsys.readouterr() + assert "Dataset name: " in out diff --git a/automl/cloud-client/import_dataset.py b/automl/cloud-client/import_dataset.py new file mode 100644 index 00000000000..c7285f7051a --- /dev/null +++ b/automl/cloud-client/import_dataset.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def import_dataset(project_id, dataset_id, path): + """Import a dataset.""" + # [START automl_import_data] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + # dataset_id = 'YOUR_DATASET_ID' + # path = 'gs://BUCKET_ID/path_to_training_data.csv' + + client = automl.AutoMlClient() + # Get the full path of the dataset. + dataset_full_id = client.dataset_path( + project_id, "us-central1", dataset_id + ) + # Get the multiple Google Cloud Storage URIs + input_uris = path.split(",") + gcs_source = automl.types.GcsSource(input_uris=input_uris) + input_config = automl.types.InputConfig(gcs_source=gcs_source) + # Import data from the input URI + response = client.import_data(dataset_full_id, input_config) + + print("Processing import...") + print(u"Data imported. {}".format(response.result())) + # [END automl_import_data] diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py new file mode 100644 index 00000000000..e5958ae2c31 --- /dev/null +++ b/automl/cloud-client/import_dataset_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os + +import pytest + +import import_dataset + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +BUCKET_ID = "{}-lcm".format(PROJECT_ID) +DATASET_ID = "TEN4058147884539838464" + + +@pytest.fixture(scope="function") +def create_dataset(): + from google.cloud import automl + + client = automl.AutoMlClient() + project_location = client.location_path(PROJECT_ID, "us-central1") + display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + metadata = automl.types.TextExtractionDatasetMetadata() + dataset = automl.types.Dataset( + display_name=display_name, text_extraction_dataset_metadata=metadata + ) + response = client.create_dataset(project_location, dataset) + dataset_id = response.result().name.split("/")[-1] + + yield dataset_id + + +@pytest.mark.slow +def test_import_dataset(capsys, create_dataset): + data = ( + "gs://cloud-samples-data/automl/language_entity_extraction/dataset.csv" + ) + import_dataset.import_dataset(PROJECT_ID, create_dataset, data) + out, _ = capsys.readouterr() + assert "Data imported." in out + + # delete created dataset + from google.cloud import automl + + client = automl.AutoMlClient() + dataset_full_id = client.dataset_path( + PROJECT_ID, "us-central1", create_dataset + ) + response = client.delete_dataset(dataset_full_id) + response.result() diff --git a/automl/cloud-client/list_datasets.py b/automl/cloud-client/list_datasets.py new file mode 100644 index 00000000000..be207669ca4 --- /dev/null +++ b/automl/cloud-client/list_datasets.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_datasets(project_id): + """List datasets.""" + # [START automl_language_entity_extraction_list_datasets] + # [START automl_language_sentiment_analysis_list_datasets] + # [START automl_language_text_classification_list_datasets] + # [START automl_translate_list_datasets] + # [START automl_vision_classification_list_datasets] + # [START automl_vision_object_detection_list_datasets] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + + client = automl.AutoMlClient() + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + + # List all the datasets available in the region. + response = client.list_datasets(project_location, "") + + print("List of datasets:") + for dataset in response: + print(u"Dataset name: {}".format(dataset.name)) + print(u"Dataset id: {}".format(dataset.name.split("/")[-1])) + print(u"Dataset display name: {}".format(dataset.display_name)) + print("Dataset create time:") + print(u"\tseconds: {}".format(dataset.create_time.seconds)) + print(u"\tnanos: {}".format(dataset.create_time.nanos)) + # [END automl_language_sentiment_analysis_list_datasets] + # [END automl_language_text_classification_list_datasets] + # [END automl_translate_list_datasets] + # [END automl_vision_classification_list_datasets] + # [END automl_vision_object_detection_list_datasets] + print( + "Text extraction dataset metadata: {}".format( + dataset.text_extraction_dataset_metadata + ) + ) + # [END automl_language_entity_extraction_list_datasets] + + # [START automl_language_sentiment_analysis_list_datasets] + print( + "Text sentiment dataset metadata: {}".format( + dataset.text_sentiment_dataset_metadata + ) + ) + # [END automl_language_sentiment_analysis_list_datasets] + + # [START automl_language_text_classification_list_datasets] + print( + "Text classification dataset metadata: {}".format( + dataset.text_classification_dataset_metadata + ) + ) + # [END automl_language_text_classification_list_datasets] + + # [START automl_translate_list_datasets] + print("Translation dataset metadata:") + print( + u"\tsource_language_code: {}".format( + dataset.translation_dataset_metadata.source_language_code + ) + ) + print( + u"\ttarget_language_code: {}".format( + dataset.translation_dataset_metadata.target_language_code + ) + ) + # [END automl_translate_list_datasets] + + # [START automl_vision_classification_list_datasets] + print( + "Image classification dataset metadata: {}".format( + dataset.image_classification_dataset_metadata + ) + ) + # [END automl_vision_classification_list_datasets] + + # [START automl_vision_object_detection_list_datasets] + print( + "Image object detection dataset metadata: {}".format( + dataset.image_object_detection_dataset_metadata + ) + ) + # [END automl_vision_object_detection_list_datasets] diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py new file mode 100644 index 00000000000..f580b30eb7f --- /dev/null +++ b/automl/cloud-client/list_datasets_test.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import list_datasets + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TEN4058147884539838464" + + +def test_list_dataset(capsys): + # list datasets + list_datasets.list_datasets(PROJECT_ID) + out, _ = capsys.readouterr() + assert "Dataset id: {}".format(DATASET_ID) in out diff --git a/automl/cloud-client/requirements.txt b/automl/cloud-client/requirements.txt new file mode 100644 index 00000000000..d64d7c33cbc --- /dev/null +++ b/automl/cloud-client/requirements.txt @@ -0,0 +1,2 @@ +google-cloud-automl==0.8.0 +google-cloud-storage==1.20.0 \ No newline at end of file From b738a75cf3df6115d13a5bd0b353b36e33764fe2 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 12 Dec 2019 16:22:13 -0700 Subject: [PATCH 02/11] Use a unique prefix --- automl/cloud-client/export_dataset_test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 841c6f7127d..9767ec71197 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import os import pytest @@ -23,12 +24,15 @@ PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) DATASET_ID = "TEN4058147884539838464" +PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime( + "%Y%m%d%H%M%S" +) @pytest.mark.slow def test_export_dataset(capsys): export_dataset.export_dataset( - PROJECT_ID, DATASET_ID, "gs://{}/TEST_EXPORT_OUTPUT/".format(BUCKET_ID) + PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX) ) out, _ = capsys.readouterr() @@ -39,6 +43,6 @@ def test_export_dataset(capsys): storage_client = storage.Client() bucket = storage_client.get_bucket(BUCKET_ID) - if len(list(bucket.list_blobs(prefix="TEST_EXPORT_OUTPUT"))) > 0: - for blob in bucket.list_blobs(prefix="TEST_EXPORT_OUTPUT"): + if len(list(bucket.list_blobs(prefix=PREFIX))) > 0: + for blob in bucket.list_blobs(prefix=PREFIX): blob.delete() From 742a40c1e223a2c2cf53dc16befdab04119c9ba0 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 17 Dec 2019 15:18:46 -0700 Subject: [PATCH 03/11] Move test imports to top / misc feedback cleanup --- automl/cloud-client/delete_dataset.py | 4 +--- automl/cloud-client/delete_dataset_test.py | 5 +---- automl/cloud-client/export_dataset.py | 6 ++---- automl/cloud-client/export_dataset_test.py | 5 +---- automl/cloud-client/get_dataset.py | 16 +++++++--------- automl/cloud-client/get_dataset_test.py | 2 -- automl/cloud-client/import_dataset.py | 6 ++---- automl/cloud-client/import_dataset_test.py | 7 +------ automl/cloud-client/list_datasets.py | 16 +++++++--------- automl/cloud-client/list_datasets_test.py | 2 -- 10 files changed, 22 insertions(+), 47 deletions(-) diff --git a/automl/cloud-client/delete_dataset.py b/automl/cloud-client/delete_dataset.py index c93f9d4f6c6..ff95726c4d5 100644 --- a/automl/cloud-client/delete_dataset.py +++ b/automl/cloud-client/delete_dataset.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,5 +29,5 @@ def delete_dataset(project_id, dataset_id): ) response = client.delete_dataset(dataset_full_id) - print(u"Dataset deleted. {}".format(response.result())) + print("Dataset deleted. {}".format(response.result())) # [END automl_delete_dataset] diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py index f5d70764924..793ba536742 100644 --- a/automl/cloud-client/delete_dataset_test.py +++ b/automl/cloud-client/delete_dataset_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +15,7 @@ import datetime import os +from google.cloud import automl import pytest import delete_dataset @@ -27,8 +26,6 @@ @pytest.fixture(scope="function") def create_dataset(): - from google.cloud import automl - client = automl.AutoMlClient() project_location = client.location_path(PROJECT_ID, "us-central1") display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") diff --git a/automl/cloud-client/export_dataset.py b/automl/cloud-client/export_dataset.py index 081ee75a934..cfecd7db6c5 100644 --- a/automl/cloud-client/export_dataset.py +++ b/automl/cloud-client/export_dataset.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +21,7 @@ def export_dataset(project_id, dataset_id, gcs_uri): # TODO(developer): Uncomment and set the following variables # project_id = 'YOUR_PROJECT_ID' # dataset_id = 'YOUR_DATASET_ID' - # gcs_uri = 'gs://BUCKET_ID/path_to_export/' + # gcs_uri = 'gs://YOUR_BUCKET_ID/path/to/export/' client = automl.AutoMlClient() @@ -36,5 +34,5 @@ def export_dataset(project_id, dataset_id, gcs_uri): output_config = automl.types.OutputConfig(gcs_destination=gcs_destination) response = client.export_data(dataset_full_id, output_config) - print(u"Dataset exported. {}".format(response.result())) + print("Dataset exported. {}".format(response.result())) # [END automl_export_dataset] diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 9767ec71197..32e2bfda90e 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +15,7 @@ import datetime import os +from google.cloud import storage import pytest import export_dataset @@ -39,8 +38,6 @@ def test_export_dataset(capsys): assert "Dataset exported" in out # Delete the created files - from google.cloud import storage - storage_client = storage.Client() bucket = storage_client.get_bucket(BUCKET_ID) if len(list(bucket.list_blobs(prefix=PREFIX))) > 0: diff --git a/automl/cloud-client/get_dataset.py b/automl/cloud-client/get_dataset.py index 89f84960fa4..b2ac379caee 100644 --- a/automl/cloud-client/get_dataset.py +++ b/automl/cloud-client/get_dataset.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,12 +35,12 @@ def get_dataset(project_id, dataset_id): dataset = client.get_dataset(dataset_full_id) # Display the dataset information - print(u"Dataset name: {}".format(dataset.name)) - print(u"Dataset id: {}".format(dataset.name.split("/")[-1])) - print(u"Dataset display name: {}".format(dataset.display_name)) + print("Dataset name: {}".format(dataset.name)) + print("Dataset id: {}".format(dataset.name.split("/")[-1])) + print("Dataset display name: {}".format(dataset.display_name)) print("Dataset create time:") - print(u"\tseconds: {}".format(dataset.create_time.seconds)) - print(u"\tnanos: {}".format(dataset.create_time.nanos)) + print("\tseconds: {}".format(dataset.create_time.seconds)) + print("\tnanos: {}".format(dataset.create_time.nanos)) # [END automl_language_sentiment_analysis_get_dataset] # [END automl_language_text_classification_get_dataset] # [END automl_translate_get_dataset] @@ -74,12 +72,12 @@ def get_dataset(project_id, dataset_id): # [START automl_translate_get_dataset] print("Translation dataset metadata:") print( - u"\tsource_language_code: {}".format( + "\tsource_language_code: {}".format( dataset.translation_dataset_metadata.source_language_code ) ) print( - u"\ttarget_language_code: {}".format( + "\ttarget_language_code: {}".format( dataset.translation_dataset_metadata.target_language_code ) ) diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py index 5c1aa1c1bd6..7a4434bc599 100644 --- a/automl/cloud-client/get_dataset_test.py +++ b/automl/cloud-client/get_dataset_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/automl/cloud-client/import_dataset.py b/automl/cloud-client/import_dataset.py index c7285f7051a..f51df5188b2 100644 --- a/automl/cloud-client/import_dataset.py +++ b/automl/cloud-client/import_dataset.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,7 +21,7 @@ def import_dataset(project_id, dataset_id, path): # TODO(developer): Uncomment and set the following variables # project_id = 'YOUR_PROJECT_ID' # dataset_id = 'YOUR_DATASET_ID' - # path = 'gs://BUCKET_ID/path_to_training_data.csv' + # path = 'gs://YOUR_BUCKET_ID/path/to/data.csv' client = automl.AutoMlClient() # Get the full path of the dataset. @@ -38,5 +36,5 @@ def import_dataset(project_id, dataset_id, path): response = client.import_data(dataset_full_id, input_config) print("Processing import...") - print(u"Data imported. {}".format(response.result())) + print("Data imported. {}".format(response.result())) # [END automl_import_data] diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index e5958ae2c31..a0111a2d2ea 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +15,7 @@ import datetime import os +from google.cloud import automl import pytest import import_dataset @@ -28,8 +27,6 @@ @pytest.fixture(scope="function") def create_dataset(): - from google.cloud import automl - client = automl.AutoMlClient() project_location = client.location_path(PROJECT_ID, "us-central1") display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") @@ -53,8 +50,6 @@ def test_import_dataset(capsys, create_dataset): assert "Data imported." in out # delete created dataset - from google.cloud import automl - client = automl.AutoMlClient() dataset_full_id = client.dataset_path( PROJECT_ID, "us-central1", create_dataset diff --git a/automl/cloud-client/list_datasets.py b/automl/cloud-client/list_datasets.py index be207669ca4..a0ef9cbfa95 100644 --- a/automl/cloud-client/list_datasets.py +++ b/automl/cloud-client/list_datasets.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,12 +35,12 @@ def list_datasets(project_id): print("List of datasets:") for dataset in response: - print(u"Dataset name: {}".format(dataset.name)) - print(u"Dataset id: {}".format(dataset.name.split("/")[-1])) - print(u"Dataset display name: {}".format(dataset.display_name)) + print("Dataset name: {}".format(dataset.name)) + print("Dataset id: {}".format(dataset.name.split("/")[-1])) + print("Dataset display name: {}".format(dataset.display_name)) print("Dataset create time:") - print(u"\tseconds: {}".format(dataset.create_time.seconds)) - print(u"\tnanos: {}".format(dataset.create_time.nanos)) + print("\tseconds: {}".format(dataset.create_time.seconds)) + print("\tnanos: {}".format(dataset.create_time.nanos)) # [END automl_language_sentiment_analysis_list_datasets] # [END automl_language_text_classification_list_datasets] # [END automl_translate_list_datasets] @@ -74,12 +72,12 @@ def list_datasets(project_id): # [START automl_translate_list_datasets] print("Translation dataset metadata:") print( - u"\tsource_language_code: {}".format( + "\tsource_language_code: {}".format( dataset.translation_dataset_metadata.source_language_code ) ) print( - u"\ttarget_language_code: {}".format( + "\ttarget_language_code: {}".format( dataset.translation_dataset_metadata.target_language_code ) ) diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py index f580b30eb7f..a0fc986597e 100644 --- a/automl/cloud-client/list_datasets_test.py +++ b/automl/cloud-client/list_datasets_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); From 2faddb168eb5a216149d589cd6f6e731f4b0d90b Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 17 Dec 2019 16:31:50 -0700 Subject: [PATCH 04/11] Update tests --- automl/cloud-client/export_dataset_test.py | 1 - automl/cloud-client/import_dataset_test.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 32e2bfda90e..e275f59f4f4 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -28,7 +28,6 @@ ) -@pytest.mark.slow def test_export_dataset(capsys): export_dataset.export_dataset( PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX) diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index a0111a2d2ea..82978af4bc4 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -22,7 +22,6 @@ PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) -DATASET_ID = "TEN4058147884539838464" @pytest.fixture(scope="function") @@ -45,14 +44,15 @@ def test_import_dataset(capsys, create_dataset): data = ( "gs://cloud-samples-data/automl/language_entity_extraction/dataset.csv" ) - import_dataset.import_dataset(PROJECT_ID, create_dataset, data) + dataset_id = create_dataset + import_dataset.import_dataset(PROJECT_ID, dataset_id, data) out, _ = capsys.readouterr() assert "Data imported." in out # delete created dataset client = automl.AutoMlClient() dataset_full_id = client.dataset_path( - PROJECT_ID, "us-central1", create_dataset + PROJECT_ID, "us-central1", dataset_id ) response = client.delete_dataset(dataset_full_id) response.result() From bd937829861b470db7662595083a71c1cff1d759 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Wed, 18 Dec 2019 09:30:03 -0700 Subject: [PATCH 05/11] Use centralized testing project for automl resources --- automl/cloud-client/delete_dataset_test.py | 2 +- automl/cloud-client/export_dataset_test.py | 5 ++--- automl/cloud-client/get_dataset_test.py | 4 ++-- automl/cloud-client/import_dataset_test.py | 2 +- automl/cloud-client/list_datasets_test.py | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py index 793ba536742..333eb3d9430 100644 --- a/automl/cloud-client/delete_dataset_test.py +++ b/automl/cloud-client/delete_dataset_test.py @@ -20,7 +20,7 @@ import delete_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = 'cdpe-automl-tests' BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index e275f59f4f4..dd35a1a317c 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -16,13 +16,12 @@ import os from google.cloud import storage -import pytest import export_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = 'cdpe-automl-tests' BUCKET_ID = "{}-lcm".format(PROJECT_ID) -DATASET_ID = "TEN4058147884539838464" +DATASET_ID = "TEN6765176298449928192" PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py index 7a4434bc599..175c89e25c7 100644 --- a/automl/cloud-client/get_dataset_test.py +++ b/automl/cloud-client/get_dataset_test.py @@ -16,8 +16,8 @@ import get_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -DATASET_ID = "TEN4058147884539838464" +PROJECT_ID = "cdpe-automl-tests" +DATASET_ID = "TEN6765176298449928192" def test_get_dataset(capsys): diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index 82978af4bc4..08bf7fdbe0a 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -20,7 +20,7 @@ import import_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = "cdpe-automl-tests" BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py index a0fc986597e..05633928823 100644 --- a/automl/cloud-client/list_datasets_test.py +++ b/automl/cloud-client/list_datasets_test.py @@ -16,8 +16,8 @@ import list_datasets -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -DATASET_ID = "TEN4058147884539838464" +PROJECT_ID = "cdpe-automl-tests" +DATASET_ID = "TEN6765176298449928192" def test_list_dataset(capsys): From 5e04d30e111a9f6a333906979fa736dece2c09a7 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Wed, 18 Dec 2019 10:02:30 -0700 Subject: [PATCH 06/11] Test fix --- automl/cloud-client/delete_dataset_test.py | 2 +- automl/cloud-client/export_dataset_test.py | 4 ++-- automl/cloud-client/get_dataset_test.py | 4 ++-- automl/cloud-client/import_dataset_test.py | 2 +- automl/cloud-client/list_datasets_test.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py index 333eb3d9430..793ba536742 100644 --- a/automl/cloud-client/delete_dataset_test.py +++ b/automl/cloud-client/delete_dataset_test.py @@ -20,7 +20,7 @@ import delete_dataset -PROJECT_ID = 'cdpe-automl-tests' +PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index dd35a1a317c..5916b180a22 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -19,9 +19,9 @@ import export_dataset -PROJECT_ID = 'cdpe-automl-tests' +PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) -DATASET_ID = "TEN6765176298449928192" +DATASET_ID = "TEN4058147884539838464" PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py index 175c89e25c7..7a4434bc599 100644 --- a/automl/cloud-client/get_dataset_test.py +++ b/automl/cloud-client/get_dataset_test.py @@ -16,8 +16,8 @@ import get_dataset -PROJECT_ID = "cdpe-automl-tests" -DATASET_ID = "TEN6765176298449928192" +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TEN4058147884539838464" def test_get_dataset(capsys): diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index 08bf7fdbe0a..82978af4bc4 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -20,7 +20,7 @@ import import_dataset -PROJECT_ID = "cdpe-automl-tests" +PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py index 05633928823..a0fc986597e 100644 --- a/automl/cloud-client/list_datasets_test.py +++ b/automl/cloud-client/list_datasets_test.py @@ -16,8 +16,8 @@ import list_datasets -PROJECT_ID = "cdpe-automl-tests" -DATASET_ID = "TEN6765176298449928192" +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TEN4058147884539838464" def test_list_dataset(capsys): From 6e8ba575d0e89322bc641046de5204e9480e901a Mon Sep 17 00:00:00 2001 From: nnegrey Date: Fri, 20 Dec 2019 08:49:36 -0700 Subject: [PATCH 07/11] Consistently use double quotes --- automl/cloud-client/delete_dataset.py | 4 ++-- automl/cloud-client/export_dataset.py | 6 +++--- automl/cloud-client/get_dataset.py | 4 ++-- automl/cloud-client/import_dataset.py | 6 +++--- automl/cloud-client/list_datasets.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/automl/cloud-client/delete_dataset.py b/automl/cloud-client/delete_dataset.py index ff95726c4d5..c02442ef307 100644 --- a/automl/cloud-client/delete_dataset.py +++ b/automl/cloud-client/delete_dataset.py @@ -19,8 +19,8 @@ def delete_dataset(project_id, dataset_id): from google.cloud import automl # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' - # dataset_id = 'YOUR_DATASET_ID' + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" client = automl.AutoMlClient() # Get the full path of the dataset diff --git a/automl/cloud-client/export_dataset.py b/automl/cloud-client/export_dataset.py index cfecd7db6c5..678806272d5 100644 --- a/automl/cloud-client/export_dataset.py +++ b/automl/cloud-client/export_dataset.py @@ -19,9 +19,9 @@ def export_dataset(project_id, dataset_id, gcs_uri): from google.cloud import automl # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' - # dataset_id = 'YOUR_DATASET_ID' - # gcs_uri = 'gs://YOUR_BUCKET_ID/path/to/export/' + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" + # gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/" client = automl.AutoMlClient() diff --git a/automl/cloud-client/get_dataset.py b/automl/cloud-client/get_dataset.py index b2ac379caee..3465f15f77f 100644 --- a/automl/cloud-client/get_dataset.py +++ b/automl/cloud-client/get_dataset.py @@ -24,8 +24,8 @@ def get_dataset(project_id, dataset_id): from google.cloud import automl # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' - # dataset_id = 'YOUR_DATASET_ID' + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" client = automl.AutoMlClient() # Get the full path of the dataset diff --git a/automl/cloud-client/import_dataset.py b/automl/cloud-client/import_dataset.py index f51df5188b2..85654aaec68 100644 --- a/automl/cloud-client/import_dataset.py +++ b/automl/cloud-client/import_dataset.py @@ -19,9 +19,9 @@ def import_dataset(project_id, dataset_id, path): from google.cloud import automl # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' - # dataset_id = 'YOUR_DATASET_ID' - # path = 'gs://YOUR_BUCKET_ID/path/to/data.csv' + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" + # path = "gs://YOUR_BUCKET_ID/path/to/data.csv" client = automl.AutoMlClient() # Get the full path of the dataset. diff --git a/automl/cloud-client/list_datasets.py b/automl/cloud-client/list_datasets.py index a0ef9cbfa95..ea66bd09ef4 100644 --- a/automl/cloud-client/list_datasets.py +++ b/automl/cloud-client/list_datasets.py @@ -24,7 +24,7 @@ def list_datasets(project_id): from google.cloud import automl # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' + # project_id = "YOUR_PROJECT_ID" client = automl.AutoMlClient() # A resource that represents Google Cloud Platform location. From b16154f61c1b23a49bc2ef188ffa5e0005a90ad1 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 2 Jan 2020 10:57:32 -0700 Subject: [PATCH 08/11] License year 2020 --- automl/cloud-client/delete_dataset.py | 2 +- automl/cloud-client/delete_dataset_test.py | 2 +- automl/cloud-client/export_dataset.py | 2 +- automl/cloud-client/export_dataset_test.py | 2 +- automl/cloud-client/get_dataset.py | 2 +- automl/cloud-client/get_dataset_test.py | 2 +- automl/cloud-client/import_dataset.py | 2 +- automl/cloud-client/import_dataset_test.py | 2 +- automl/cloud-client/list_datasets.py | 2 +- automl/cloud-client/list_datasets_test.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/automl/cloud-client/delete_dataset.py b/automl/cloud-client/delete_dataset.py index c02442ef307..e6136c13b84 100644 --- a/automl/cloud-client/delete_dataset.py +++ b/automl/cloud-client/delete_dataset.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py index 793ba536742..fc9ecbde4d7 100644 --- a/automl/cloud-client/delete_dataset_test.py +++ b/automl/cloud-client/delete_dataset_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/export_dataset.py b/automl/cloud-client/export_dataset.py index 678806272d5..45f7ee6bdd7 100644 --- a/automl/cloud-client/export_dataset.py +++ b/automl/cloud-client/export_dataset.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 5916b180a22..87502d6fee2 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/get_dataset.py b/automl/cloud-client/get_dataset.py index 3465f15f77f..a1831903c17 100644 --- a/automl/cloud-client/get_dataset.py +++ b/automl/cloud-client/get_dataset.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py index 7a4434bc599..6ae960fd417 100644 --- a/automl/cloud-client/get_dataset_test.py +++ b/automl/cloud-client/get_dataset_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/import_dataset.py b/automl/cloud-client/import_dataset.py index 85654aaec68..f465bdb1e02 100644 --- a/automl/cloud-client/import_dataset.py +++ b/automl/cloud-client/import_dataset.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index 82978af4bc4..761050d9544 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/list_datasets.py b/automl/cloud-client/list_datasets.py index ea66bd09ef4..ae8c576e33c 100644 --- a/automl/cloud-client/list_datasets.py +++ b/automl/cloud-client/list_datasets.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py index a0fc986597e..2e183d194c7 100644 --- a/automl/cloud-client/list_datasets_test.py +++ b/automl/cloud-client/list_datasets_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From b881b201e4ff9f1924d1ccc626a23f1f21a0729d Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 2 Jan 2020 13:59:01 -0700 Subject: [PATCH 09/11] Use a fake dataset to fix the flaky test as only one operation can work at a time --- automl/cloud-client/export_dataset_test.py | 35 ++++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 87502d6fee2..9548739c1f4 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -15,29 +15,32 @@ import datetime import os -from google.cloud import storage - import export_dataset PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) -DATASET_ID = "TEN4058147884539838464" PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime( "%Y%m%d%H%M%S" ) +DATASET_ID = "TEN0000000000000000000" def test_export_dataset(capsys): - export_dataset.export_dataset( - PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX) - ) - - out, _ = capsys.readouterr() - assert "Dataset exported" in out - - # Delete the created files - storage_client = storage.Client() - bucket = storage_client.get_bucket(BUCKET_ID) - if len(list(bucket.list_blobs(prefix=PREFIX))) > 0: - for blob in bucket.list_blobs(prefix=PREFIX): - blob.delete() + # As exporting a dataset can take a long time and only one operation can be + # run on a dataset at once. Try to export a nonexistent dataset and confirm + # that the dataset was not found, but other elements of the request were\ + # valid. + try: + export_dataset.export_dataset( + PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX) + ) + out, _ = capsys.readouterr() + assert ( + "The Dataset doesn't exist or is inaccessible for use with AutoMl." + in out + ) + except Exception as e: + assert ( + "The Dataset doesn't exist or is inaccessible for use with AutoMl." + in e.message + ) From a70f4dd75f0d3c05169d410981866c70fb40e5f8 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 7 Jan 2020 13:47:13 -0700 Subject: [PATCH 10/11] use centralized automl testing project --- automl/cloud-client/delete_dataset_test.py | 2 +- automl/cloud-client/export_dataset_test.py | 2 +- automl/cloud-client/get_dataset_test.py | 4 ++-- automl/cloud-client/import_dataset_test.py | 2 +- automl/cloud-client/list_datasets_test.py | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/automl/cloud-client/delete_dataset_test.py b/automl/cloud-client/delete_dataset_test.py index fc9ecbde4d7..8a1057a6f95 100644 --- a/automl/cloud-client/delete_dataset_test.py +++ b/automl/cloud-client/delete_dataset_test.py @@ -20,7 +20,7 @@ import delete_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/export_dataset_test.py b/automl/cloud-client/export_dataset_test.py index 9548739c1f4..de8bfe5aefa 100644 --- a/automl/cloud-client/export_dataset_test.py +++ b/automl/cloud-client/export_dataset_test.py @@ -17,7 +17,7 @@ import export_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime( "%Y%m%d%H%M%S" diff --git a/automl/cloud-client/get_dataset_test.py b/automl/cloud-client/get_dataset_test.py index 6ae960fd417..3cc688eb3f9 100644 --- a/automl/cloud-client/get_dataset_test.py +++ b/automl/cloud-client/get_dataset_test.py @@ -16,8 +16,8 @@ import get_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -DATASET_ID = "TEN4058147884539838464" +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +DATASET_ID = os.environ["ENTITY_EXTRACTION_DATASET_ID"] def test_get_dataset(capsys): diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index 761050d9544..e753ce08c32 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -20,7 +20,7 @@ import import_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) diff --git a/automl/cloud-client/list_datasets_test.py b/automl/cloud-client/list_datasets_test.py index 2e183d194c7..7057af815d1 100644 --- a/automl/cloud-client/list_datasets_test.py +++ b/automl/cloud-client/list_datasets_test.py @@ -16,8 +16,8 @@ import list_datasets -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -DATASET_ID = "TEN4058147884539838464" +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +DATASET_ID = os.environ["ENTITY_EXTRACTION_DATASET_ID"] def test_list_dataset(capsys): From 4dbaf3b1941169664b476a5427cbb9afb05fa432 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 7 Jan 2020 15:00:35 -0700 Subject: [PATCH 11/11] use different automl product to import data --- automl/cloud-client/import_dataset_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/automl/cloud-client/import_dataset_test.py b/automl/cloud-client/import_dataset_test.py index e753ce08c32..2064abbcb48 100644 --- a/automl/cloud-client/import_dataset_test.py +++ b/automl/cloud-client/import_dataset_test.py @@ -29,9 +29,11 @@ def create_dataset(): client = automl.AutoMlClient() project_location = client.location_path(PROJECT_ID, "us-central1") display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") - metadata = automl.types.TextExtractionDatasetMetadata() + metadata = automl.types.TextSentimentDatasetMetadata( + sentiment_max=4 + ) dataset = automl.types.Dataset( - display_name=display_name, text_extraction_dataset_metadata=metadata + display_name=display_name, text_sentiment_dataset_metadata=metadata ) response = client.create_dataset(project_location, dataset) dataset_id = response.result().name.split("/")[-1] @@ -42,7 +44,7 @@ def create_dataset(): @pytest.mark.slow def test_import_dataset(capsys, create_dataset): data = ( - "gs://cloud-samples-data/automl/language_entity_extraction/dataset.csv" + "gs://{}/sentiment-analysis/dataset.csv".format(BUCKET_ID) ) dataset_id = create_dataset import_dataset.import_dataset(PROJECT_ID, dataset_id, data)