53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
"""Image analysis tool - delegates to vision-capable LLM."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
|
|
@tool("analyze_image", "Describe or analyze an image file", category="media")
|
|
def analyze_image(
|
|
path: str, question: str = "Describe this image in detail.", ctx: dict | None = None
|
|
) -> str:
|
|
p = Path(path).resolve()
|
|
if not p.exists():
|
|
return f"Image not found: {path}"
|
|
|
|
suffix = p.suffix.lower()
|
|
mime_map = {
|
|
".png": "image/png",
|
|
".jpg": "image/jpeg",
|
|
".jpeg": "image/jpeg",
|
|
".gif": "image/gif",
|
|
".webp": "image/webp",
|
|
".bmp": "image/bmp",
|
|
}
|
|
mime = mime_map.get(suffix, "image/png")
|
|
|
|
try:
|
|
data = base64.b64encode(p.read_bytes()).decode("utf-8")
|
|
except Exception as e:
|
|
return f"Error reading image: {e}"
|
|
|
|
if ctx and ctx.get("agent"):
|
|
agent = ctx["agent"]
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": question},
|
|
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{data}"}},
|
|
],
|
|
},
|
|
]
|
|
result_parts = []
|
|
for chunk in agent.llm.chat(messages, stream=False):
|
|
if chunk["type"] == "text":
|
|
result_parts.append(chunk["content"])
|
|
return "".join(result_parts) or "Could not analyze image."
|
|
|
|
return "Agent context not available for image analysis."
|