1.3: Fix files parameter in agent.py — attachments now visible to LLM
Previously respond() accepted files but silently dropped them. Now when files are attached: - Images are base64-encoded as image_url content parts - Text files are read and inlined as text content parts - The last user message is converted to multipart format Follows the same encoding pattern used in tools/image.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>cora-start
parent
9002fc08d2
commit
ed751d843b
|
|
@ -2,10 +2,12 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
|
|
@ -16,6 +18,49 @@ log = logging.getLogger(__name__)
|
|||
|
||||
MAX_TOOL_ITERATIONS = 5
|
||||
|
||||
_IMAGE_MIME = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}
|
||||
|
||||
|
||||
def _build_file_content_parts(files: list[str]) -> list[dict]:
|
||||
"""Encode file attachments as content parts for the LLM message.
|
||||
|
||||
Images → base64 image_url parts; text files → inline text parts.
|
||||
"""
|
||||
parts: list[dict] = []
|
||||
for file_path in files:
|
||||
p = Path(file_path).resolve()
|
||||
if not p.exists():
|
||||
parts.append({"type": "text", "text": f"[File not found: {file_path}]"})
|
||||
continue
|
||||
|
||||
suffix = p.suffix.lower()
|
||||
if suffix in _IMAGE_MIME:
|
||||
try:
|
||||
data = base64.b64encode(p.read_bytes()).decode("utf-8")
|
||||
mime = _IMAGE_MIME[suffix]
|
||||
parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{mime};base64,{data}"},
|
||||
})
|
||||
except Exception as e:
|
||||
parts.append({"type": "text", "text": f"[Error reading image {p.name}: {e}]"})
|
||||
else:
|
||||
try:
|
||||
text = p.read_text(encoding="utf-8", errors="replace")
|
||||
if len(text) > 10000:
|
||||
text = text[:10000] + "\n... (truncated)"
|
||||
parts.append({"type": "text", "text": f"[File: {p.name}]\n{text}"})
|
||||
except Exception as e:
|
||||
parts.append({"type": "text", "text": f"[Error reading {p.name}: {e}]"})
|
||||
return parts
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, config: Config, db: Database, llm: LLMAdapter):
|
||||
|
|
@ -73,6 +118,20 @@ class Agent:
|
|||
system_prompt, history, self.config.memory.max_context_messages
|
||||
)
|
||||
|
||||
# If files are attached, replace the last user message with multipart content
|
||||
if files:
|
||||
file_parts = _build_file_content_parts(files)
|
||||
if file_parts:
|
||||
# Find the last user message and convert to multipart
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
if messages[i]["role"] == "user":
|
||||
text_content = messages[i]["content"]
|
||||
messages[i]["content"] = [
|
||||
{"type": "text", "text": text_content},
|
||||
*file_parts,
|
||||
]
|
||||
break
|
||||
|
||||
# Agent loop: LLM call → tool execution → repeat
|
||||
seen_tool_calls: set[str] = set() # track (name, args_json) to prevent duplicates
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue