Skip to content

feat(image-generation): Image generation using Gemini Flash #13334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions genai/image_generation/imggen_mmflash_edit_img_with_txt_img.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def generate_content() -> str:
# [START googlegenaisdk_imggen_mmflash_edit_img_with_txt_img]
from google import genai
from google.genai.types import GenerateContentConfig, Modality
from PIL import Image
from io import BytesIO

client = genai.Client()

# Using an image of Eiffel tower, with fireworks in the background.
image = Image.open("example-image.png")

response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[image, "Edit this image to make it look like a cartoon."],
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
)
for part in response.candidates[0].content.parts:
if part.text:
print(part.text)
elif part.inline_data:
image = Image.open(BytesIO((part.inline_data.data)))
image.save("bw-example-image.png")
# Example response:
# Here's the cartoon-style edit of the image:
# Cartoon-style edit:
# - Simplified the Eiffel Tower with bolder lines and slightly exaggerated proportions.
# - Brightened and saturated the colors of the sky, fireworks, and foliage for a more vibrant, cartoonish look.
# ....
# [END googlegenaisdk_imggen_mmflash_edit_img_with_txt_img]
return "bw-example-image.png"


if __name__ == "__main__":
generate_content()
49 changes: 49 additions & 0 deletions genai/image_generation/imggen_mmflash_txt_and_img_with_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def generate_content() -> int:
# [START googlegenaisdk_imggen_mmflash_txt_and_img_with_txt]
from google import genai
from google.genai.types import GenerateContentConfig, Modality
from PIL import Image
from io import BytesIO

client = genai.Client()

response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=(
"Generate an illustrated recipe for a paella."
"Create images to go alongside the text as you generate the recipe"
),
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
)
with open("paella-recipe.md", "w") as fp:
for i, part in enumerate(response.candidates[0].content.parts):
if part.text is not None:
fp.write(part.text)
elif part.inline_data is not None:
image = Image.open(BytesIO((part.inline_data.data)))
image.save(f"example-image-{i+1}.png")
fp.write(f"![image](./example-image-{i+1}.png)")
# Example response:
# A markdown page for a Paella recipe(`paella-recipe.md`) has been generated.
# It includes detailed steps and several images illustrating the cooking process.
# [END googlegenaisdk_imggen_mmflash_txt_and_img_with_txt]
return i


if __name__ == "__main__":
generate_content()
46 changes: 46 additions & 0 deletions genai/image_generation/imggen_mmflash_with_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def generate_content() -> str:
# [START googlegenaisdk_imggen_mmflash_with_txt]
from google import genai
from google.genai.types import GenerateContentConfig, Modality
from PIL import Image
from io import BytesIO

client = genai.Client()

response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=(
"Generate an image of the Eiffel tower with fireworks in the background."
),
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
)
for part in response.candidates[0].content.parts:
if part.text:
print(part.text)
elif part.inline_data:
image = Image.open(BytesIO((part.inline_data.data)))
image.save("example-image.png")
# Example response:
# A beautiful photograph captures the iconic Eiffel Tower in Paris, France,
# against a backdrop of a vibrant and dynamic fireworks display. The tower itself...
# [END googlegenaisdk_imggen_mmflash_with_txt]
return "example-image.png"


if __name__ == "__main__":
generate_content()
2 changes: 1 addition & 1 deletion genai/image_generation/noxfile_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

TEST_CONFIG_OVERRIDE = {
# You can opt out from the test for specific Python versions.
"ignored_versions": ["2.7", "3.7", "3.8", "3.10", "3.11", "3.13"],
"ignored_versions": ["2.7", "3.7", "3.8", "3.9", "3.10", "3.11", "3.13"],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
"enforce_type_hints": True,
Expand Down
52 changes: 43 additions & 9 deletions genai/image_generation/test_image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@
import pytest

import imggen_canny_ctrl_type_with_txt_img

import imggen_mmflash_edit_img_with_txt_img
import imggen_mmflash_txt_and_img_with_txt
import imggen_mmflash_with_txt
import imggen_raw_reference_with_txt_img

import imggen_scribble_ctrl_type_with_txt_img

import imggen_style_reference_with_txt_img

import imggen_subj_refer_ctrl_refer_with_txt_imgs


Expand All @@ -57,25 +56,60 @@ def output_gcs_uri() -> str:


def test_img_customization_subject(output_gcs_uri: str) -> None:
response = imggen_subj_refer_ctrl_refer_with_txt_imgs.subject_customization(output_gcs_uri=output_gcs_uri)
response = imggen_subj_refer_ctrl_refer_with_txt_imgs.subject_customization(
output_gcs_uri=output_gcs_uri
)
assert response


def test_img_customization_style(output_gcs_uri: str) -> None:
response = imggen_style_reference_with_txt_img.style_customization(output_gcs_uri=output_gcs_uri)
response = imggen_style_reference_with_txt_img.style_customization(
output_gcs_uri=output_gcs_uri
)
assert response


def test_img_customization_style_transfer(output_gcs_uri: str) -> None:
response = imggen_raw_reference_with_txt_img.style_transfer_customization(output_gcs_uri=output_gcs_uri)
response = imggen_raw_reference_with_txt_img.style_transfer_customization(
output_gcs_uri=output_gcs_uri
)
assert response


def test_img_customization_scribble(output_gcs_uri: str) -> None:
response = imggen_scribble_ctrl_type_with_txt_img.scribble_customization(output_gcs_uri=output_gcs_uri)
response = imggen_scribble_ctrl_type_with_txt_img.scribble_customization(
output_gcs_uri=output_gcs_uri
)
assert response


def test_img_customization_canny_edge(output_gcs_uri: str) -> None:
response = imggen_canny_ctrl_type_with_txt_img.canny_edge_customization(output_gcs_uri=output_gcs_uri)
response = imggen_canny_ctrl_type_with_txt_img.canny_edge_customization(
output_gcs_uri=output_gcs_uri
)
assert response


def test_imggen_mmflash_examples() -> None:
# generate image
fname = imggen_mmflash_with_txt.generate_content()
assert os.path.isfile(fname)
# edit generate image
new_fname = imggen_mmflash_edit_img_with_txt_img.generate_content()
assert os.path.isfile(new_fname)

# clean-up
os.remove(fname)
os.remove(new_fname)


def test_imggen_mmflash_txt_and_img_with_txt() -> None:
last_image_id = imggen_mmflash_txt_and_img_with_txt.generate_content()
# clean-up
for i in range(last_image_id + 1):
img_name = f"example-image-{i+1}.png"
if os.path.isfile(img_name):
os.remove(img_name)
fname = "paella-recipe.md"
if os.path.isfile(fname):
os.remove(fname)