From f741c89722028ef1dfaec4cffae1c031e51d5f00 Mon Sep 17 00:00:00 2001
From: nnegrey <nnegrey@google.com>
Date: Thu, 2 Jan 2020 11:46:26 -0700
Subject: [PATCH 1/4] automl: add natural language entity extraction ga samples

---
 ...nguage_entity_extraction_create_dataset.py | 42 +++++++++++++++
 ...e_entity_extraction_create_dataset_test.py | 42 +++++++++++++++
 ...language_entity_extraction_create_model.py | 43 +++++++++++++++
 ...age_entity_extraction_create_model_test.py | 34 ++++++++++++
 .../language_entity_extraction_predict.py     | 53 +++++++++++++++++++
 ...language_entity_extraction_predict_test.py | 33 ++++++++++++
 6 files changed, 247 insertions(+)
 create mode 100644 automl/cloud-client/language_entity_extraction_create_dataset.py
 create mode 100644 automl/cloud-client/language_entity_extraction_create_dataset_test.py
 create mode 100644 automl/cloud-client/language_entity_extraction_create_model.py
 create mode 100644 automl/cloud-client/language_entity_extraction_create_model_test.py
 create mode 100644 automl/cloud-client/language_entity_extraction_predict.py
 create mode 100644 automl/cloud-client/language_entity_extraction_predict_test.py

diff --git a/automl/cloud-client/language_entity_extraction_create_dataset.py b/automl/cloud-client/language_entity_extraction_create_dataset.py
new file mode 100644
index 00000000000..056ff22c9d5
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_create_dataset.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_dataset(project_id, display_name):
+    """Create a dataset."""
+    # [START automl_language_entity_extraction_create_dataset]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # display_name = "YOUR_DATASET_NAME"
+
+    client = automl.AutoMlClient()
+
+    # A resource that represents Google Cloud Platform location.
+    project_location = client.location_path(project_id, "us-central1")
+    metadata = automl.types.TextExtractionDatasetMetadata()
+    dataset = automl.types.Dataset(
+        display_name=display_name, text_extraction_dataset_metadata=metadata
+    )
+
+    # Create a dataset with the dataset metadata in the region.
+    response = client.create_dataset(project_location, dataset)
+
+    created_dataset = response.result()
+
+    # Display the dataset information
+    print("Dataset name: {}".format(created_dataset.name))
+    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
+    # [END automl_language_entity_extraction_create_dataset]
diff --git a/automl/cloud-client/language_entity_extraction_create_dataset_test.py b/automl/cloud-client/language_entity_extraction_create_dataset_test.py
new file mode 100644
index 00000000000..15e0ba6d19a
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_create_dataset_test.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import os
+
+from google.cloud import automl
+
+import language_entity_extraction_create_dataset
+
+
+PROJECT_ID = os.environ["GCLOUD_PROJECT"]
+
+
+def test_entity_extraction_create_dataset(capsys):
+    # create dataset
+    dataset_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    language_entity_extraction_create_dataset.create_dataset(
+        PROJECT_ID, dataset_name
+    )
+    out, _ = capsys.readouterr()
+    assert "Dataset id: " in out
+
+    # Delete the created dataset
+    dataset_id = out.splitlines()[1].split()[2]
+    client = automl.AutoMlClient()
+    dataset_full_id = client.dataset_path(
+        PROJECT_ID, "us-central1", dataset_id
+    )
+    response = client.delete_dataset(dataset_full_id)
+    response.result()
diff --git a/automl/cloud-client/language_entity_extraction_create_model.py b/automl/cloud-client/language_entity_extraction_create_model.py
new file mode 100644
index 00000000000..5e0748dd567
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_create_model.py
@@ -0,0 +1,43 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_model(project_id, dataset_id, display_name):
+    """Create a model."""
+    # [START automl_language_entity_extraction_create_model]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # dataset_id = "YOUR_DATASET_ID"
+    # display_name = "YOUR_MODEL_NAME"
+
+    client = automl.AutoMlClient()
+
+    # A resource that represents Google Cloud Platform location.
+    project_location = client.location_path(project_id, "us-central1")
+    # Leave model unset to use the default base model provided by Google
+    metadata = automl.types.TextExtractionModelMetadata()
+    model = automl.types.Model(
+        display_name=display_name,
+        dataset_id=dataset_id,
+        text_extraction_model_metadata=metadata,
+    )
+
+    # Create a model with the model metadata in the region.
+    response = client.create_model(project_location, model)
+
+    print("Training operation name: {}".format(response.operation.name))
+    print("Training started...")
+    # [END automl_language_entity_extraction_create_model]
diff --git a/automl/cloud-client/language_entity_extraction_create_model_test.py b/automl/cloud-client/language_entity_extraction_create_model_test.py
new file mode 100644
index 00000000000..27fef303771
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_create_model_test.py
@@ -0,0 +1,34 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import language_entity_extraction_create_model
+
+PROJECT_ID = os.environ["GCLOUD_PROJECT"]
+DATASET_ID = "TEN0000000000000000000"
+
+
+def test_entity_extraction_create_model(capsys):
+    # As entity extraction does not let you cancel model creation, instead try
+    # to create a model from a nonexistent dataset, but other elements of the
+    # request were valid.
+    try:
+        language_entity_extraction_create_model.create_model(
+            PROJECT_ID, DATASET_ID, "classification_test_create_model"
+        )
+        out, _ = capsys.readouterr()
+        assert "Dataset does not exist." in out
+    except Exception as e:
+        assert "Dataset does not exist." in e.message
diff --git a/automl/cloud-client/language_entity_extraction_predict.py b/automl/cloud-client/language_entity_extraction_predict.py
new file mode 100644
index 00000000000..020474d3ab1
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_predict.py
@@ -0,0 +1,53 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def predict(project_id, model_id, content):
+    """Predict."""
+    # [START automl_language_entity_extraction_predict]
+    from google.cloud import automl
+
+    # TODO(developer): Uncomment and set the following variables
+    # project_id = "YOUR_PROJECT_ID"
+    # model_id = "YOUR_MODEL_ID"
+    # content = "text to predict"
+
+    prediction_client = automl.PredictionServiceClient()
+
+    # Get the full path of the model.
+    model_full_id = prediction_client.model_path(
+        project_id, "us-central1", model_id
+    )
+
+    text_snippet = automl.types.TextSnippet(
+        content=content, mime_type="text/plain"
+    )  # Types: 'text/plain', 'text/html'
+    payload = automl.types.ExamplePayload(text_snippet=text_snippet)
+
+    response = prediction_client.predict(model_full_id, payload)
+
+    for annotation_payload in response.payload:
+        print(
+            "Text Extract Entity Types: {}".format(
+                annotation_payload.display_name
+            )
+        )
+        print(
+            "Text Score: {}".format(annotation_payload.text_extraction.score)
+        )
+        text_segment = annotation_payload.text_extraction.text_segment
+        print("Text Extract Entity Content: {}".format(text_segment.content))
+        print("Text Start Offset: {}".format(text_segment.start_offset))
+        print("Text End Offset: {}".format(text_segment.end_offset))
+    # [END automl_language_entity_extraction_predict]
diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py
new file mode 100644
index 00000000000..4353a4f14eb
--- /dev/null
+++ b/automl/cloud-client/language_entity_extraction_predict_test.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import language_entity_extraction_predict
+
+PROJECT_ID = os.environ["GCLOUD_PROJECT"]
+BUCKET_ID = "{}-lcm".format(PROJECT_ID)
+
+
+def test_predict(capsys):
+    model_id = "TEN5112482778553778176"
+    text = (
+        "Constitutional mutations in the WT1 gene in patients with "
+        "Denys-Drash syndrome."
+    )
+    language_entity_extraction_predict.predict(PROJECT_ID, model_id, text)
+    out, _ = capsys.readouterr()
+    assert "Text Extract Entity Types: " in out

From dd07a7d5f2b06d4091505777485c935a94e7ed86 Mon Sep 17 00:00:00 2001
From: Noah Negrey <nnegrey@users.noreply.github.com>
Date: Thu, 2 Jan 2020 12:19:47 -0700
Subject: [PATCH 2/4] Update language_entity_extraction_predict_test.py

---
 ...language_entity_extraction_predict_test.py | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py
index 4353a4f14eb..742bc4c0c54 100644
--- a/automl/cloud-client/language_entity_extraction_predict_test.py
+++ b/automl/cloud-client/language_entity_extraction_predict_test.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 # Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,13 +14,29 @@
 
 import os
 
+from google.cloud import automl
+import pytest
+
 import language_entity_extraction_predict
 
 PROJECT_ID = os.environ["GCLOUD_PROJECT"]
 BUCKET_ID = "{}-lcm".format(PROJECT_ID)
 
 
-def test_predict(capsys):
+@pytest.fixture(scope="function")
+def verify_model_state():
+    client = automl.AutoMlClient()
+    model_full_id = client.model_path(PROJECT_ID, "us-central1", MODEL_ID)
+
+    model = client.get_model(model_full_id)
+    if model.deployment_state == automl.enums.Model.DeploymentState.UNDEPLOYED:
+        # Deploy model if it is not deployed
+        response = client.deploy_model(model_full_id)
+        response.result()
+
+
+def test_predict(capsys, verify_model_state):
+    verify_model_state
     model_id = "TEN5112482778553778176"
     text = (
         "Constitutional mutations in the WT1 gene in patients with "

From aad697c62fb1ba6e635c0822fe0175c2474920df Mon Sep 17 00:00:00 2001
From: Noah Negrey <nnegrey@users.noreply.github.com>
Date: Thu, 2 Jan 2020 12:20:24 -0700
Subject: [PATCH 3/4] Update language_entity_extraction_predict_test.py

---
 .../cloud-client/language_entity_extraction_predict_test.py  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py
index 742bc4c0c54..82477339310 100644
--- a/automl/cloud-client/language_entity_extraction_predict_test.py
+++ b/automl/cloud-client/language_entity_extraction_predict_test.py
@@ -20,7 +20,7 @@
 import language_entity_extraction_predict
 
 PROJECT_ID = os.environ["GCLOUD_PROJECT"]
-BUCKET_ID = "{}-lcm".format(PROJECT_ID)
+MODEL_ID = "TEN5112482778553778176"
 
 
 @pytest.fixture(scope="function")
@@ -37,11 +37,10 @@ def verify_model_state():
 
 def test_predict(capsys, verify_model_state):
     verify_model_state
-    model_id = "TEN5112482778553778176"
     text = (
         "Constitutional mutations in the WT1 gene in patients with "
         "Denys-Drash syndrome."
     )
-    language_entity_extraction_predict.predict(PROJECT_ID, model_id, text)
+    language_entity_extraction_predict.predict(PROJECT_ID, MODEL_ID, text)
     out, _ = capsys.readouterr()
     assert "Text Extract Entity Types: " in out

From ed76b1fd3f6bb8cae222beee112a5ad3513541ab Mon Sep 17 00:00:00 2001
From: nnegrey <nnegrey@google.com>
Date: Tue, 7 Jan 2020 12:59:59 -0700
Subject: [PATCH 4/4] use centralized automl testing project and add comments
 that link to docs

---
 .../language_entity_extraction_create_dataset_test.py         | 2 +-
 .../language_entity_extraction_create_model_test.py           | 2 +-
 automl/cloud-client/language_entity_extraction_predict.py     | 4 +++-
 .../cloud-client/language_entity_extraction_predict_test.py   | 4 ++--
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/automl/cloud-client/language_entity_extraction_create_dataset_test.py b/automl/cloud-client/language_entity_extraction_create_dataset_test.py
index 15e0ba6d19a..044a0d50590 100644
--- a/automl/cloud-client/language_entity_extraction_create_dataset_test.py
+++ b/automl/cloud-client/language_entity_extraction_create_dataset_test.py
@@ -20,7 +20,7 @@
 import language_entity_extraction_create_dataset
 
 
-PROJECT_ID = os.environ["GCLOUD_PROJECT"]
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
 
 
 def test_entity_extraction_create_dataset(capsys):
diff --git a/automl/cloud-client/language_entity_extraction_create_model_test.py b/automl/cloud-client/language_entity_extraction_create_model_test.py
index 27fef303771..0ff74c89b13 100644
--- a/automl/cloud-client/language_entity_extraction_create_model_test.py
+++ b/automl/cloud-client/language_entity_extraction_create_model_test.py
@@ -16,7 +16,7 @@
 
 import language_entity_extraction_create_model
 
-PROJECT_ID = os.environ["GCLOUD_PROJECT"]
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
 DATASET_ID = "TEN0000000000000000000"
 
 
diff --git a/automl/cloud-client/language_entity_extraction_predict.py b/automl/cloud-client/language_entity_extraction_predict.py
index 020474d3ab1..40d7e89b280 100644
--- a/automl/cloud-client/language_entity_extraction_predict.py
+++ b/automl/cloud-client/language_entity_extraction_predict.py
@@ -30,9 +30,11 @@ def predict(project_id, model_id, content):
         project_id, "us-central1", model_id
     )
 
+    # Supported mime_types: 'text/plain', 'text/html'
+    # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsnippet
     text_snippet = automl.types.TextSnippet(
         content=content, mime_type="text/plain"
-    )  # Types: 'text/plain', 'text/html'
+    )
     payload = automl.types.ExamplePayload(text_snippet=text_snippet)
 
     response = prediction_client.predict(model_full_id, payload)
diff --git a/automl/cloud-client/language_entity_extraction_predict_test.py b/automl/cloud-client/language_entity_extraction_predict_test.py
index 82477339310..35dfddefa05 100644
--- a/automl/cloud-client/language_entity_extraction_predict_test.py
+++ b/automl/cloud-client/language_entity_extraction_predict_test.py
@@ -19,8 +19,8 @@
 
 import language_entity_extraction_predict
 
-PROJECT_ID = os.environ["GCLOUD_PROJECT"]
-MODEL_ID = "TEN5112482778553778176"
+PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
+MODEL_ID = os.environ["ENTITY_EXTRACTION_MODEL_ID"]
 
 
 @pytest.fixture(scope="function")