From f741c89722028ef1dfaec4cffae1c031e51d5f00 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 2 Jan 2020 11:46:26 -0700 Subject: [PATCH 1/4] automl: add natural language entity extraction ga samples --- ...nguage_entity_extraction_create_dataset.py | 42 +++++++++++++++ ...e_entity_extraction_create_dataset_test.py | 42 +++++++++++++++ ...language_entity_extraction_create_model.py | 43 +++++++++++++++ ...age_entity_extraction_create_model_test.py | 34 ++++++++++++ .../language_entity_extraction_predict.py | 53 +++++++++++++++++++ ...language_entity_extraction_predict_test.py | 33 ++++++++++++ 6 files changed, 247 insertions(+) create mode 100644 automl/cloud-client/language_entity_extraction_create_dataset.py create mode 100644 automl/cloud-client/language_entity_extraction_create_dataset_test.py create mode 100644 automl/cloud-client/language_entity_extraction_create_model.py create mode 100644 automl/cloud-client/language_entity_extraction_create_model_test.py create mode 100644 automl/cloud-client/language_entity_extraction_predict.py create mode 100644 automl/cloud-client/language_entity_extraction_predict_test.py diff --git a/automl/cloud-client/language_entity_extraction_create_dataset.py b/automl/cloud-client/language_entity_extraction_create_dataset.py new file mode 100644 index 00000000000..056ff22c9d5 --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_create_dataset.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_dataset(project_id, display_name): + """Create a dataset.""" + # [START automl_language_entity_extraction_create_dataset] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # display_name = "YOUR_DATASET_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + metadata = automl.types.TextExtractionDatasetMetadata() + dataset = automl.types.Dataset( + display_name=display_name, text_extraction_dataset_metadata=metadata + ) + + # Create a dataset with the dataset metadata in the region. + response = client.create_dataset(project_location, dataset) + + created_dataset = response.result() + + # Display the dataset information + print("Dataset name: {}".format(created_dataset.name)) + print("Dataset id: {}".format(created_dataset.name.split("/")[-1])) + # [END automl_language_entity_extraction_create_dataset] diff --git a/automl/cloud-client/language_entity_extraction_create_dataset_test.py b/automl/cloud-client/language_entity_extraction_create_dataset_test.py new file mode 100644 index 00000000000..15e0ba6d19a --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_create_dataset_test.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os + +from google.cloud import automl + +import language_entity_extraction_create_dataset + + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] + + +def test_entity_extraction_create_dataset(capsys): + # create dataset + dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + language_entity_extraction_create_dataset.create_dataset( + PROJECT_ID, dataset_name + ) + out, _ = capsys.readouterr() + assert "Dataset id: " in out + + # Delete the created dataset + dataset_id = out.splitlines()[1].split()[2] + client = automl.AutoMlClient() + dataset_full_id = client.dataset_path( + PROJECT_ID, "us-central1", dataset_id + ) + response = client.delete_dataset(dataset_full_id) + response.result() diff --git a/automl/cloud-client/language_entity_extraction_create_model.py b/automl/cloud-client/language_entity_extraction_create_model.py new file mode 100644 index 00000000000..5e0748dd567 --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_create_model.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_model(project_id, dataset_id, display_name): + """Create a model.""" + # [START automl_language_entity_extraction_create_model] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # dataset_id = "YOUR_DATASET_ID" + # display_name = "YOUR_MODEL_NAME" + + client = automl.AutoMlClient() + + # A resource that represents Google Cloud Platform location. + project_location = client.location_path(project_id, "us-central1") + # Leave model unset to use the default base model provided by Google + metadata = automl.types.TextExtractionModelMetadata() + model = automl.types.Model( + display_name=display_name, + dataset_id=dataset_id, + text_extraction_model_metadata=metadata, + ) + + # Create a model with the model metadata in the region. + response = client.create_model(project_location, model) + + print("Training operation name: {}".format(response.operation.name)) + print("Training started...") + # [END automl_language_entity_extraction_create_model] diff --git a/automl/cloud-client/language_entity_extraction_create_model_test.py b/automl/cloud-client/language_entity_extraction_create_model_test.py new file mode 100644 index 00000000000..27fef303771 --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_create_model_test.py @@ -0,0 +1,34 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import language_entity_extraction_create_model + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +DATASET_ID = "TEN0000000000000000000" + + +def test_entity_extraction_create_model(capsys): + # As entity extraction does not let you cancel model creation, instead try + # to create a model from a nonexistent dataset, but other elements of the + # request were valid. + try: + language_entity_extraction_create_model.create_model( + PROJECT_ID, DATASET_ID, "classification_test_create_model" + ) + out, _ = capsys.readouterr() + assert "Dataset does not exist." in out + except Exception as e: + assert "Dataset does not exist." in e.message diff --git a/automl/cloud-client/language_entity_extraction_predict.py b/automl/cloud-client/language_entity_extraction_predict.py new file mode 100644 index 00000000000..020474d3ab1 --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_predict.py @@ -0,0 +1,53 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def predict(project_id, model_id, content): + """Predict.""" + # [START automl_language_entity_extraction_predict] + from google.cloud import automl + + # TODO(developer): Uncomment and set the following variables + # project_id = "YOUR_PROJECT_ID" + # model_id = "YOUR_MODEL_ID" + # content = "text to predict" + + prediction_client = automl.PredictionServiceClient() + + # Get the full path of the model. + model_full_id = prediction_client.model_path( + project_id, "us-central1", model_id + ) + + text_snippet = automl.types.TextSnippet( + content=content, mime_type="text/plain" + ) # Types: 'text/plain', 'text/html' + payload = automl.types.ExamplePayload(text_snippet=text_snippet) + + response = prediction_client.predict(model_full_id, payload) + + for annotation_payload in response.payload: + print( + "Text Extract Entity Types: {}".format( + annotation_payload.display_name + ) + ) + print( + "Text Score: {}".format(annotation_payload.text_extraction.score) + ) + text_segment = annotation_payload.text_extraction.text_segment + print("Text Extract Entity Content: {}".format(text_segment.content)) + print("Text Start Offset: {}".format(text_segment.start_offset)) + print("Text End Offset: {}".format(text_segment.end_offset)) + # [END automl_language_entity_extraction_predict] diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py new file mode 100644 index 00000000000..4353a4f14eb --- /dev/null +++ b/automl/cloud-client/language_entity_extraction_predict_test.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import language_entity_extraction_predict + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +BUCKET_ID = "{}-lcm".format(PROJECT_ID) + + +def test_predict(capsys): + model_id = "TEN5112482778553778176" + text = ( + "Constitutional mutations in the WT1 gene in patients with " + "Denys-Drash syndrome." + ) + language_entity_extraction_predict.predict(PROJECT_ID, model_id, text) + out, _ = capsys.readouterr() + assert "Text Extract Entity Types: " in out From dd07a7d5f2b06d4091505777485c935a94e7ed86 Mon Sep 17 00:00:00 2001 From: Noah Negrey Date: Thu, 2 Jan 2020 12:19:47 -0700 Subject: [PATCH 2/4] Update language_entity_extraction_predict_test.py --- ...language_entity_extraction_predict_test.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py index 4353a4f14eb..742bc4c0c54 100644 --- a/automl/cloud-client/language_entity_extraction_predict_test.py +++ b/automl/cloud-client/language_entity_extraction_predict_test.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,13 +14,29 @@ import os +from google.cloud import automl +import pytest + import language_entity_extraction_predict PROJECT_ID = os.environ["GCLOUD_PROJECT"] BUCKET_ID = "{}-lcm".format(PROJECT_ID) -def test_predict(capsys): +@pytest.fixture(scope="function") +def verify_model_state(): + client = automl.AutoMlClient() + model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID) + + model = client.get_model(model_full_id) + if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED: + # Deploy model if it is not deployed + response = client.deploy_model(model_full_id) + response.result() + + +def test_predict(capsys, verify_model_state): + verify_model_state model_id = "TEN5112482778553778176" text = ( "Constitutional mutations in the WT1 gene in patients with " From aad697c62fb1ba6e635c0822fe0175c2474920df Mon Sep 17 00:00:00 2001 From: Noah Negrey Date: Thu, 2 Jan 2020 12:20:24 -0700 Subject: [PATCH 3/4] Update language_entity_extraction_predict_test.py --- .../cloud-client/language_entity_extraction_predict_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py index 742bc4c0c54..82477339310 100644 --- a/automl/cloud-client/language_entity_extraction_predict_test.py +++ b/automl/cloud-client/language_entity_extraction_predict_test.py @@ -20,7 +20,7 @@ import language_entity_extraction_predict PROJECT_ID = os.environ["GCLOUD_PROJECT"] -BUCKET_ID = "{}-lcm".format(PROJECT_ID) +MODEL_ID = "TEN5112482778553778176" @pytest.fixture(scope="function") @@ -37,11 +37,10 @@ def verify_model_state(): def test_predict(capsys, verify_model_state): verify_model_state - model_id = "TEN5112482778553778176" text = ( "Constitutional mutations in the WT1 gene in patients with " "Denys-Drash syndrome." ) - language_entity_extraction_predict.predict(PROJECT_ID, model_id, text) + language_entity_extraction_predict.predict(PROJECT_ID, MODEL_ID, text) out, _ = capsys.readouterr() assert "Text Extract Entity Types: " in out From ed76b1fd3f6bb8cae222beee112a5ad3513541ab Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 7 Jan 2020 12:59:59 -0700 Subject: [PATCH 4/4] use centralized automl testing project and add comments that link to docs --- .../language_entity_extraction_create_dataset_test.py | 2 +- .../language_entity_extraction_create_model_test.py | 2 +- automl/cloud-client/language_entity_extraction_predict.py | 4 +++- .../cloud-client/language_entity_extraction_predict_test.py | 4 ++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/automl/cloud-client/language_entity_extraction_create_dataset_test.py b/automl/cloud-client/language_entity_extraction_create_dataset_test.py index 15e0ba6d19a..044a0d50590 100644 --- a/automl/cloud-client/language_entity_extraction_create_dataset_test.py +++ b/automl/cloud-client/language_entity_extraction_create_dataset_test.py @@ -20,7 +20,7 @@ import language_entity_extraction_create_dataset -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] def test_entity_extraction_create_dataset(capsys): diff --git a/automl/cloud-client/language_entity_extraction_create_model_test.py b/automl/cloud-client/language_entity_extraction_create_model_test.py index 27fef303771..0ff74c89b13 100644 --- a/automl/cloud-client/language_entity_extraction_create_model_test.py +++ b/automl/cloud-client/language_entity_extraction_create_model_test.py @@ -16,7 +16,7 @@ import language_entity_extraction_create_model -PROJECT_ID = os.environ["GCLOUD_PROJECT"] +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] DATASET_ID = "TEN0000000000000000000" diff --git a/automl/cloud-client/language_entity_extraction_predict.py b/automl/cloud-client/language_entity_extraction_predict.py index 020474d3ab1..40d7e89b280 100644 --- a/automl/cloud-client/language_entity_extraction_predict.py +++ b/automl/cloud-client/language_entity_extraction_predict.py @@ -30,9 +30,11 @@ def predict(project_id, model_id, content): project_id, "us-central1", model_id ) + # Supported mime_types: 'text/plain', 'text/html' + # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet text_snippet = automl.types.TextSnippet( content=content, mime_type="text/plain" - ) # Types: 'text/plain', 'text/html' + ) payload = automl.types.ExamplePayload(text_snippet=text_snippet) response = prediction_client.predict(model_full_id, payload) diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py index 82477339310..35dfddefa05 100644 --- a/automl/cloud-client/language_entity_extraction_predict_test.py +++ b/automl/cloud-client/language_entity_extraction_predict_test.py @@ -19,8 +19,8 @@ import language_entity_extraction_predict -PROJECT_ID = os.environ["GCLOUD_PROJECT"] -MODEL_ID = "TEN5112482778553778176" +PROJECT_ID = os.environ["AUTOML_PROJECT_ID"] +MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"] @pytest.fixture(scope="function")