CheddahBot/cheddahbot/tools/image.py

53 lines
1.5 KiB
Python

"""Image analysis tool - delegates to vision-capable LLM."""
from __future__ import annotations
import base64
from pathlib import Path
from . import tool
@tool("analyze_image", "Describe or analyze an image file", category="media")
def analyze_image(
path: str, question: str = "Describe this image in detail.", ctx: dict | None = None
) -> str:
p = Path(path).resolve()
if not p.exists():
return f"Image not found: {path}"
suffix = p.suffix.lower()
mime_map = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
}
mime = mime_map.get(suffix, "image/png")
try:
data = base64.b64encode(p.read_bytes()).decode("utf-8")
except Exception as e:
return f"Error reading image: {e}"
if ctx and ctx.get("agent"):
agent = ctx["agent"]
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": question},
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
],
},
]
result_parts = []
for chunk in agent.llm.chat(messages, stream=False):
if chunk["type"] == "text":
result_parts.append(chunk["content"])
return "".join(result_parts) or "Could not analyze image."
return "Agent context not available for image analysis."