CheddahBot/cheddahbot/tools/image.py

"""Image analysis tool - delegates to vision-capable LLM."""

from __future__ import annotations

import base64
from pathlib import Path

from . import tool


@tool("analyze_image", "Describe or analyze an image file", category="media")
def analyze_image(
    path: str, question: str = "Describe this image in detail.", ctx: dict | None = None
) -> str:
    p = Path(path).resolve()
    if not p.exists():
        return f"Image not found: {path}"

    suffix = p.suffix.lower()
    mime_map = {
        ".png": "image/png",
        ".jpg": "image/jpeg",
        ".jpeg": "image/jpeg",
        ".gif": "image/gif",
        ".webp": "image/webp",
        ".bmp": "image/bmp",
    }
    mime = mime_map.get(suffix, "image/png")

    try:
        data = base64.b64encode(p.read_bytes()).decode("utf-8")
    except Exception as e:
        return f"Error reading image: {e}"

    if ctx and ctx.get("agent"):
        agent = ctx["agent"]
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": question},
                    {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
                ],
            },
        ]
        result_parts = []
        for chunk in agent.llm.chat(messages, stream=False):
            if chunk["type"] == "text":
                result_parts.append(chunk["content"])
        return "".join(result_parts) or "Could not analyze image."

    return "Agent context not available for image analysis."