Skip to content

Image Agent

Source code in OpAgentsOlympus/practice/image_agent.py
OpAgentsOlympus/practice/image_agent.py
import asyncio
import base64
import os
from config import config
from agents import Agent, Runner

FILEPATH = os.path.join(os.path.dirname(__file__), "image_bison.png")


def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_string


async def main():
    # Print base64-encoded image
    b64_image = image_to_base64(FILEPATH)

    agent = Agent(
        name="Assistant",
        instructions="You are a helpful assistant.",
    )

    result = await Runner.run(
        agent,
        [
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_image",
                        "detail": "auto",
                        "image_url": f"data:image/jpeg;base64,{b64_image}",
                    }
                ],
            },
            {
                "role": "user",
                "content": "What do you see in this image?",
            },
        ],
        run_config=config,
    )
    print(result.final_output)


if __name__ == "__main__":
    asyncio.run(main())