42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
"""Image analysis tool - delegates to vision-capable LLM."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
|
|
@tool("analyze_image", "Describe or analyze an image file", category="media")
|
|
def analyze_image(path: str, question: str = "Describe this image in detail.", ctx: dict = None) -> str:
|
|
p = Path(path).resolve()
|
|
if not p.exists():
|
|
return f"Image not found: {path}"
|
|
|
|
suffix = p.suffix.lower()
|
|
mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
|
|
".gif": "image/gif", ".webp": "image/webp", ".bmp": "image/bmp"}
|
|
mime = mime_map.get(suffix, "image/png")
|
|
|
|
try:
|
|
data = base64.b64encode(p.read_bytes()).decode("utf-8")
|
|
except Exception as e:
|
|
return f"Error reading image: {e}"
|
|
|
|
if ctx and ctx.get("agent"):
|
|
agent = ctx["agent"]
|
|
messages = [
|
|
{"role": "user", "content": [
|
|
{"type": "text", "text": question},
|
|
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
|
|
]},
|
|
]
|
|
result_parts = []
|
|
for chunk in agent.llm.chat(messages, stream=False):
|
|
if chunk["type"] == "text":
|
|
result_parts.append(chunk["content"])
|
|
return "".join(result_parts) or "Could not analyze image."
|
|
|
|
return "Agent context not available for image analysis."
|