CheddahBot/cheddahbot/tools/image.py

"""Image analysis tool - delegates to vision-capable LLM."""

from __future__ import annotations

import base64
from pathlib import Path

from . import tool


@tool("analyze_image", "Describe or analyze an image file", category="media")
def analyze_image(path: str, question: str = "Describe this image in detail.", ctx: dict = None) -> str:
    p = Path(path).resolve()
    if not p.exists():
        return f"Image not found: {path}"

    suffix = p.suffix.lower()
    mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
                ".gif": "image/gif", ".webp": "image/webp", ".bmp": "image/bmp"}
    mime = mime_map.get(suffix, "image/png")

    try:
        data = base64.b64encode(p.read_bytes()).decode("utf-8")
    except Exception as e:
        return f"Error reading image: {e}"

    if ctx and ctx.get("agent"):
        agent = ctx["agent"]
        messages = [
            {"role": "user", "content": [
                {"type": "text", "text": question},
                {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
            ]},
        ]
        result_parts = []
        for chunk in agent.llm.chat(messages, stream=False):
            if chunk["type"] == "text":
                result_parts.append(chunk["content"])
        return "".join(result_parts) or "Could not analyze image."

    return "Agent context not available for image analysis."