Generate images with conversable agents.
pip install ag2[openai,lmm]
import os
from IPython.display import display
from PIL.Image import Image
import autogen
from autogen.agentchat.contrib import img_utils
from autogen.agentchat.contrib.capabilities import generate_images
gpt_config = {
"config_list": [{"model": "gpt-4-turbo-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
gpt_vision_config = {
"config_list": [{"model": "gpt-4-vision-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
dalle_config = {
"config_list": [{"model": "dall-e-3", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
CRITIC_SYSTEM_MESSAGE = """You need to improve the prompt of the figures you saw.
How to create an image that is better in terms of color, shape, text (clarity), and other things.
Reply with the following format:
CRITICS: the image needs to improve...
PROMPT: here is the updated prompt!
If you have no critique or a prompt, just say TERMINATE
"""
def _is_termination_message(msg) -> bool:
# Detects if we should terminate the conversation
if isinstance(msg.get("content"), str):
return msg["content"].rstrip().endswith("TERMINATE")
elif isinstance(msg.get("content"), list):
for content in msg["content"]:
if isinstance(content, dict) and "text" in content:
return content["text"].rstrip().endswith("TERMINATE")
return False
def critic_agent() -> autogen.ConversableAgent:
return autogen.ConversableAgent(
name="critic",
llm_config=gpt_vision_config,
system_message=CRITIC_SYSTEM_MESSAGE,
max_consecutive_auto_reply=3,
human_input_mode="NEVER",
is_termination_msg=lambda msg: _is_termination_message(msg),
)
def image_generator_agent() -> autogen.ConversableAgent:
# Create the agent
agent = autogen.ConversableAgent(
name="dalle",
llm_config=gpt_vision_config,
max_consecutive_auto_reply=3,
human_input_mode="NEVER",
is_termination_msg=lambda msg: _is_termination_message(msg),
)
# Add image generation ability to the agent
dalle_gen = generate_images.DalleImageGenerator(llm_config=dalle_config)
image_gen_capability = generate_images.ImageGeneration(
image_generator=dalle_gen, text_analyzer_llm_config=gpt_config
)
image_gen_capability.add_to_agent(agent)
return agent
extract_img
to help us extract the image generated by the
image generator agent.
def extract_images(sender: autogen.ConversableAgent, recipient: autogen.ConversableAgent) -> Image:
images = []
all_messages = sender.chat_messages[recipient]
for message in reversed(all_messages):
# The GPT-4V format, where the content is an array of data
contents = message.get("content", [])
for content in contents:
if isinstance(content, str):
continue
if content.get("type", "") == "image_url":
img_data = content["image_url"]["url"]
images.append(img_utils.get_pil_image(img_data))
if not images:
raise ValueError("No image data found in messages.")
return images
dalle = image_generator_agent()
critic = critic_agent()
img_prompt = "A happy dog wearing a shirt saying 'I Love AG2'. Make sure the text is clear."
# img_prompt = "Ask me how I'm doing"
result = dalle.initiate_chat(critic, message=img_prompt)
images = extract_images(dalle, critic)
for image in reversed(images):
display(image.resize((300, 300)))