feat: add imagen samples: generate image from prompt, get captions, g… (GoogleCloudPlatform#11247)

irataxy · web-flow · commit cfdbb240dc9b · 2024-02-29T10:40:18.000-08:00
* feat: add imagen samples: generate image from prompt, get captions, get responses (vqa)

* change test question for image responses

* Trigger Build
diff --git a/generative_ai/imagen/generate_image.py b/generative_ai/imagen/generate_image.py
@@ -0,0 +1,87 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Vertex AI sample for generating an image using only
+    descriptive text as an input.
+Example usage:
+    python generate_image.py --project_id <project-id> --location <location> \
+        --output_file <filepath> --prompt <text>
+"""
+
+# [START aiplatform_imagen_generate_image]
+
+import argparse
+
+import vertexai
+from vertexai.preview.vision_models import ImageGenerationModel
+
+
+def generate_image(
+    project_id: str, location: str, output_file: str, prompt: str
+) -> vertexai.preview.vision_models.ImageGenerationResponse:
+    """Generate an image using a text prompt.
+    Args:
+      project_id: Google Cloud project ID, used to initialize Vertex AI.
+      location: Google Cloud region, used to initialize Vertex AI.
+      output_file: Local path to the output image file.
+      prompt: The text prompt describing what you want to see."""
+
+    vertexai.init(project=project_id, location=location)
+
+    model = ImageGenerationModel.from_pretrained("imagegeneration@005")
+
+    images = model.generate_images(
+        prompt=prompt,
+        # Optional parameters
+        seed=1,
+        number_of_images=1,
+    )
+
+    images[0].save(location=output_file, include_generation_parameters=True)
+
+    # Optional. View the generated image in a notebook.
+    # images[0].show()
+
+    print(f"Created output image using {len(images[0]._image_bytes)} bytes")
+
+    return images
+
+
+# [END aiplatform_imagen_generate_image]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project_id", help="Your Cloud project ID.", required=True)
+    parser.add_argument(
+        "--location",
+        help="The location in which to initialize Vertex AI.",
+        default="us-central1",
+    )
+    parser.add_argument(
+        "--output_file",
+        help="The local path to the output file (e.g., 'my-output.png').",
+        required=True,
+    )
+    parser.add_argument(
+        "--prompt",
+        help="The text prompt describing what you want to see (e.g., 'a dog reading a newspaper').",
+        required=True,
+    )
+    args = parser.parse_args()
+    generate_image(
+        args.project_id,
+        args.location,
+        args.output_file,
+        args.prompt,
+    )
diff --git a/generative_ai/imagen/generate_image_test.py b/generative_ai/imagen/generate_image_test.py
@@ -0,0 +1,40 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import backoff
+
+import generate_image
+
+from google.api_core.exceptions import ResourceExhausted
+
+
+_RESOURCES = os.path.join(os.path.dirname(__file__), "test_resources")
+_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+_LOCATION = "us-central1"
+_OUTPUT_FILE = os.path.join(_RESOURCES, "dog_newspaper.png")
+_PROMPT = "a dog reading a newspaper"
+
+
+@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60)
+def test_generate_image() -> None:
+    response = generate_image.generate_image(
+        _PROJECT_ID,
+        _LOCATION,
+        _OUTPUT_FILE,
+        _PROMPT,
+    )
+
+    assert len(response[0]._image_bytes) > 1000
diff --git a/generative_ai/imagen/get_short_form_image_captions.py b/generative_ai/imagen/get_short_form_image_captions.py
@@ -0,0 +1,75 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Vertex AI sample for getting short-form image captions.
+Example usage:
+    python get_short_form_image_captions.py --project_id <project-id> --location <location> \
+        --input_file <filepath>
+"""
+
+# [START aiplatform_imagen_get_short_form_image_captions]
+
+import argparse
+
+import vertexai
+from vertexai.preview.vision_models import Image, ImageTextModel
+
+
+def get_short_form_image_captions(
+    project_id: str, location: str, input_file: str
+) -> list:
+    """Get short-form captions for a local image.
+    Args:
+      project_id: Google Cloud project ID, used to initialize Vertex AI.
+      location: Google Cloud region, used to initialize Vertex AI.
+      input_file: Local path to the input image file."""
+
+    vertexai.init(project=project_id, location=location)
+
+    model = ImageTextModel.from_pretrained("imagetext@001")
+    source_img = Image.load_from_file(location=input_file)
+
+    captions = model.get_captions(
+        image=source_img,
+        # Optional parameters
+        language="en",
+        number_of_results=1,
+    )
+
+    print(captions)
+
+    return captions
+
+
+# [END aiplatform_imagen_get_short_form_image_captions]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project_id", help="Your Cloud project ID.", required=True)
+    parser.add_argument(
+        "--location",
+        help="The location in which to initialize Vertex AI.",
+        default="us-central1",
+    )
+    parser.add_argument(
+        "--input_file",
+        help="The local path to the input file (e.g., 'my-input.png').",
+        required=True,
+    )
+    args = parser.parse_args()
+    get_short_form_image_captions(
+        args.project_id,
+        args.location,
+        args.input_file,
+    )
diff --git a/generative_ai/imagen/get_short_form_image_captions_test.py b/generative_ai/imagen/get_short_form_image_captions_test.py
@@ -0,0 +1,38 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import backoff
+
+import get_short_form_image_captions
+
+from google.api_core.exceptions import ResourceExhausted
+
+
+_RESOURCES = os.path.join(os.path.dirname(__file__), "test_resources")
+_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+_LOCATION = "us-central1"
+_INPUT_FILE = os.path.join(_RESOURCES, "cat.png")
+
+
+@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60)
+def test_get_short_form_image_captions() -> None:
+    response = get_short_form_image_captions.get_short_form_image_captions(
+        _PROJECT_ID,
+        _LOCATION,
+        _INPUT_FILE,
+    )
+
+    assert len(response) > 0 and "cat" in response[0]
diff --git a/generative_ai/imagen/get_short_form_image_responses.py b/generative_ai/imagen/get_short_form_image_responses.py
@@ -0,0 +1,83 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Vertex AI sample for getting short-form responses to a
+    question about an image.
+Example usage:
+    python get_short_form_image_responses.py --project_id <project-id> --location <location> \
+        --input_file <filepath> --question <text>
+"""
+
+# [START aiplatform_imagen_get_short_form_image_responses]
+
+import argparse
+
+import vertexai
+from vertexai.preview.vision_models import Image, ImageTextModel
+
+
+def get_short_form_image_responses(
+    project_id: str, location: str, input_file: str, question: str
+) -> list:
+    """Get short-form responses to a question about a local image.
+    Args:
+      project_id: Google Cloud project ID, used to initialize Vertex AI.
+      location: Google Cloud region, used to initialize Vertex AI.
+      input_file: Local path to the input image file.
+      question: The question about the contents of the image."""
+
+    vertexai.init(project=project_id, location=location)
+
+    model = ImageTextModel.from_pretrained("imagetext@001")
+    source_img = Image.load_from_file(location=input_file)
+
+    answers = model.ask_question(
+        image=source_img,
+        question=question,
+        # Optional parameters
+        number_of_results=1,
+    )
+
+    print(answers)
+
+    return answers
+
+
+# [END aiplatform_imagen_get_short_form_image_responses]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project_id", help="Your Cloud project ID.", required=True)
+    parser.add_argument(
+        "--location",
+        help="The location in which to initialize Vertex AI.",
+        default="us-central1",
+    )
+    parser.add_argument(
+        "--input_file",
+        help="The local path to the input file (e.g., 'my-input.png').",
+        required=True,
+    )
+    parser.add_argument(
+        "--question",
+        help="The question about the image (e.g., 'What breed of dog is this a picture of?').",
+        required=True,
+    )
+    args = parser.parse_args()
+    get_short_form_image_responses(
+        args.project_id,
+        args.location,
+        args.input_file,
+        args.question,
+    )
diff --git a/generative_ai/imagen/get_short_form_image_responses_test.py b/generative_ai/imagen/get_short_form_image_responses_test.py
@@ -0,0 +1,40 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import backoff
+
+import get_short_form_image_responses
+
+from google.api_core.exceptions import ResourceExhausted
+
+
+_RESOURCES = os.path.join(os.path.dirname(__file__), "test_resources")
+_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+_LOCATION = "us-central1"
+_INPUT_FILE = os.path.join(_RESOURCES, "cat.png")
+_QUESTION = "What breed of cat is this a picture of?"
+
+
+@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60)
+def test_get_short_form_image_responses() -> None:
+    response = get_short_form_image_responses.get_short_form_image_responses(
+        _PROJECT_ID,
+        _LOCATION,
+        _INPUT_FILE,
+        _QUESTION,
+    )
+
+    assert len(response) > 0 and "tabby" in response[0]