CheddahBot/cheddahbot/tools/image.py

42 lines
1.4 KiB
Python

"""Image analysis tool - delegates to vision-capable LLM."""
from __future__ import annotations
import base64
from pathlib import Path
from . import tool
@tool("analyze_image", "Describe or analyze an image file", category="media")
def analyze_image(path: str, question: str = "Describe this image in detail.", ctx: dict = None) -> str:
p = Path(path).resolve()
if not p.exists():
return f"Image not found: {path}"
suffix = p.suffix.lower()
mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
".gif": "image/gif", ".webp": "image/webp", ".bmp": "image/bmp"}
mime = mime_map.get(suffix, "image/png")
try:
data = base64.b64encode(p.read_bytes()).decode("utf-8")
except Exception as e:
return f"Error reading image: {e}"
if ctx and ctx.get("agent"):
agent = ctx["agent"]
messages = [
{"role": "user", "content": [
{"type": "text", "text": question},
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
]},
]
result_parts = []
for chunk in agent.llm.chat(messages, stream=False):
if chunk["type"] == "text":
result_parts.append(chunk["content"])
return "".join(result_parts) or "Could not analyze image."
return "Agent context not available for image analysis."