164 lines
5.7 KiB
Python
164 lines
5.7 KiB
Python
"""Message formatting and system prompt construction."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
|
|
def build_system_prompt(
|
|
identity_dir: Path,
|
|
memory_context: str = "",
|
|
tools_description: str = "",
|
|
skills_context: str = "",
|
|
) -> str:
|
|
"""Build the system prompt from identity files + memory + skills + tools."""
|
|
parts = []
|
|
|
|
# 1. Identity: SOUL.md
|
|
soul_path = identity_dir / "SOUL.md"
|
|
if soul_path.exists():
|
|
parts.append(soul_path.read_text(encoding="utf-8").strip())
|
|
|
|
# 2. Identity: USER.md
|
|
user_path = identity_dir / "USER.md"
|
|
if user_path.exists():
|
|
parts.append(user_path.read_text(encoding="utf-8").strip())
|
|
|
|
# 3. Memory context (injected by memory system)
|
|
if memory_context:
|
|
parts.append(f"# Relevant Memory\n{memory_context}")
|
|
|
|
# 4. Skills context (injected by skill registry)
|
|
if skills_context:
|
|
parts.append(skills_context)
|
|
|
|
# 5. Available tools
|
|
if tools_description:
|
|
parts.append(f"# Available Tools\n{tools_description}")
|
|
|
|
# 5. Core instructions
|
|
parts.append(
|
|
"# Instructions\n"
|
|
"- Use tools when they would help answer the user's request.\n"
|
|
"- If you learn something important about the user, save it to memory.\n"
|
|
"- Be concise but thorough. Don't pad responses unnecessarily.\n"
|
|
"- When uncertain, ask for clarification.\n"
|
|
"- Reference memories naturally when relevant.\n"
|
|
"- IMPORTANT: Do NOT call the same tool twice with the same arguments. "
|
|
"If a tool already returned a result, use that result — do not re-call it.\n"
|
|
"- After using tools, always respond to the user with a final answer. "
|
|
"Do not end your turn with only tool calls and no text.\n"
|
|
"- For tasks requiring shell commands, file edits, or system access, "
|
|
"use the delegate_task tool instead of trying to do it yourself.\n"
|
|
"- EXCEPTION: If a specialized tool exists for the request (e.g. write_press_releases), "
|
|
"call that tool directly — do NOT delegate it. Specialized tools have their own "
|
|
"orchestration and skill prompts built in."
|
|
)
|
|
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
|
|
def format_messages_for_llm(
|
|
system_prompt: str,
|
|
history: list[dict],
|
|
max_messages: int = 50,
|
|
) -> list[dict]:
|
|
"""Format conversation history into LLM message format.
|
|
|
|
Handles three message types from the DB:
|
|
- user: passed through as role=user
|
|
- assistant: reconstructed with tool_calls in OpenAI format when present;
|
|
skipped if empty content AND no tool_calls
|
|
- tool: kept as role=tool with a tool_call_id linking back to the
|
|
assistant message that requested it
|
|
"""
|
|
messages = [{"role": "system", "content": system_prompt}]
|
|
|
|
recent = history[-max_messages:] if len(history) > max_messages else history
|
|
|
|
for msg in recent:
|
|
role = msg.get("role", "user")
|
|
content = msg.get("content", "")
|
|
tool_calls = msg.get("tool_calls") # list or None
|
|
|
|
if role == "user":
|
|
messages.append({"role": "user", "content": content})
|
|
|
|
elif role == "assistant":
|
|
# Skip completely empty assistant messages (no text, no tool_calls)
|
|
if not content and not tool_calls:
|
|
continue
|
|
|
|
entry: dict = {"role": "assistant", "content": content or None}
|
|
|
|
if tool_calls:
|
|
openai_tcs = []
|
|
for i, tc in enumerate(tool_calls):
|
|
tc_id = tc.get("id") or f"call_{tc.get('name', 'unknown')}_{i}"
|
|
openai_tcs.append(
|
|
{
|
|
"id": tc_id,
|
|
"type": "function",
|
|
"function": {
|
|
"name": tc.get("name", "unknown"),
|
|
"arguments": json.dumps(tc.get("input", {})),
|
|
},
|
|
}
|
|
)
|
|
entry["tool_calls"] = openai_tcs
|
|
|
|
messages.append(entry)
|
|
|
|
elif role == "tool":
|
|
tool_name = msg.get("tool_result", "unknown")
|
|
tc_id = _find_tool_call_id(messages, tool_name)
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": tc_id,
|
|
"content": content,
|
|
}
|
|
)
|
|
|
|
return _merge_consecutive(messages)
|
|
|
|
|
|
def _find_tool_call_id(messages: list[dict], tool_name: str) -> str:
|
|
"""Walk backwards through messages to find the tool_call_id for a tool result."""
|
|
for msg in reversed(messages):
|
|
if msg.get("role") != "assistant" or "tool_calls" not in msg:
|
|
continue
|
|
for tc in msg["tool_calls"]:
|
|
fn = tc.get("function", {})
|
|
if fn.get("name") == tool_name:
|
|
return tc["id"]
|
|
# Fallback: generate a deterministic ID so the API doesn't reject it
|
|
return f"call_{tool_name}_0"
|
|
|
|
|
|
def _merge_consecutive(messages: list[dict]) -> list[dict]:
|
|
"""Merge back-to-back messages with the same role to avoid API rejection.
|
|
|
|
Only merges user or assistant messages (not system or tool).
|
|
"""
|
|
if not messages:
|
|
return messages
|
|
|
|
merged: list[dict] = [messages[0]]
|
|
for msg in messages[1:]:
|
|
prev = merged[-1]
|
|
if (
|
|
msg["role"] == prev["role"]
|
|
and msg["role"] in ("user", "assistant")
|
|
and "tool_calls" not in prev
|
|
and "tool_calls" not in msg
|
|
):
|
|
# Merge text content
|
|
prev_text = prev.get("content") or ""
|
|
new_text = msg.get("content") or ""
|
|
prev["content"] = f"{prev_text}\n\n{new_text}".strip()
|
|
else:
|
|
merged.append(msg)
|
|
return merged
|