Implement two-brain architecture: chat brain + execution brain

Chat brain uses OpenAI-compatible APIs (OpenRouter/Ollama/LM Studio) for
all UI conversations, giving full control over system prompts so the
Cheddah personality works correctly. Execution brain uses Claude Code CLI
for heartbeat, scheduled tasks, and delegated system-level work.

- Split llm.py: chat() routes through OpenAI-compat only, new execute()
  calls Claude CLI with Bash/Read/Edit/Write/Glob/Grep tools
- Add chat_model config field (default: openai/gpt-4o-mini)
- Add delegate_task tool bridging chat brain to execution brain
- Scheduler/heartbeat now use execute_task() for real CLI power
- UI dropdown shows chat-only models with custom value support
- Updated model list to current OpenRouter top models (Feb 2026)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
cora-start
PeninsulaInd 2026-02-13 22:59:32 -06:00
parent 1b73cf2e5d
commit af767f9684
9 changed files with 211 additions and 115 deletions

View File

@ -24,13 +24,18 @@ def main():
log.info("Initializing database...") log.info("Initializing database...")
db = Database(config.db_path) db = Database(config.db_path)
log.info("Initializing LLM adapter (default model: %s)...", config.default_model) log.info("Chat brain model: %s", config.chat_model)
log.info("Execution brain model: %s (Claude Code CLI)", config.default_model)
llm = LLMAdapter( llm = LLMAdapter(
default_model=config.default_model, default_model=config.chat_model,
openrouter_key=config.openrouter_api_key, openrouter_key=config.openrouter_api_key,
ollama_url=config.ollama_url, ollama_url=config.ollama_url,
lmstudio_url=config.lmstudio_url, lmstudio_url=config.lmstudio_url,
) )
if llm.is_execution_brain_available():
log.info("Execution brain: Claude Code CLI found in PATH")
else:
log.warning("Execution brain: Claude Code CLI NOT found — heartbeat/scheduler tasks will fail")
log.info("Creating agent...") log.info("Creating agent...")
agent = Agent(config, db, llm) agent = Agent(config, db, llm)

View File

@ -132,3 +132,21 @@ class Agent:
for chunk in self.respond(prompt): for chunk in self.respond(prompt):
result_parts.append(chunk) result_parts.append(chunk)
return "".join(result_parts) return "".join(result_parts)
def execute_task(self, prompt: str, system_context: str = "") -> str:
"""Execute a task using the execution brain (Claude Code CLI).
Used by heartbeat, scheduler, and the delegate tool.
Logs the result to daily memory if available.
"""
log.info("Execution brain task: %s", prompt[:100])
result = self.llm.execute(prompt, system_prompt=system_context)
# Log to daily memory
if self._memory:
try:
self._memory.log_daily(f"[Execution] {prompt[:200]}\n{result[:500]}")
except Exception as e:
log.warning("Failed to log execution to memory: %s", e)
return result

View File

@ -35,6 +35,7 @@ class ShellConfig:
@dataclass @dataclass
class Config: class Config:
chat_model: str = "openai/gpt-4o-mini"
default_model: str = "claude-sonnet-4-20250514" default_model: str = "claude-sonnet-4-20250514"
host: str = "0.0.0.0" host: str = "0.0.0.0"
port: int = 7860 port: int = 7860
@ -63,7 +64,7 @@ def load_config() -> Config:
if yaml_path.exists(): if yaml_path.exists():
with open(yaml_path) as f: with open(yaml_path) as f:
data = yaml.safe_load(f) or {} data = yaml.safe_load(f) or {}
for key in ("default_model", "host", "port", "ollama_url", "lmstudio_url"): for key in ("chat_model", "default_model", "host", "port", "ollama_url", "lmstudio_url"):
if key in data: if key in data:
setattr(cfg, key, data[key]) setattr(cfg, key, data[key])
if "memory" in data and isinstance(data["memory"], dict): if "memory" in data and isinstance(data["memory"], dict):
@ -81,6 +82,8 @@ def load_config() -> Config:
# Env var overrides (CHEDDAH_ prefix) # Env var overrides (CHEDDAH_ prefix)
cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "") cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "")
if cm := os.getenv("CHEDDAH_CHAT_MODEL"):
cfg.chat_model = cm
if m := os.getenv("CHEDDAH_DEFAULT_MODEL"): if m := os.getenv("CHEDDAH_DEFAULT_MODEL"):
cfg.default_model = m cfg.default_model = m
if h := os.getenv("CHEDDAH_HOST"): if h := os.getenv("CHEDDAH_HOST"):

View File

@ -1,9 +1,14 @@
"""Model-agnostic LLM adapter. """Two-brain LLM adapter.
Routing: Chat Brain:
- Claude models Claude Code SDK (subprocess, uses Max subscription) - OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs)
- Cloud models OpenRouter (single API key, OpenAI-compatible) - Full control over system prompt Cheddah personality works here
- Local models direct HTTP (Ollama / LM Studio, OpenAI-compatible) - Claude models available via OpenRouter mapping
Execution Brain:
- Claude Code CLI (subprocess)
- Used for heartbeat, scheduled tasks, delegated system-level work
- Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here
""" """
from __future__ import annotations from __future__ import annotations
@ -26,31 +31,26 @@ log = logging.getLogger(__name__)
class ModelInfo: class ModelInfo:
id: str id: str
name: str name: str
provider: str # "claude" | "openrouter" | "ollama" | "lmstudio" provider: str # "openrouter" | "ollama" | "lmstudio"
context_length: int | None = None context_length: int | None = None
# Well-known Claude models that route through the SDK # Claude model IDs → OpenRouter equivalents (for chat dropdown)
CLAUDE_MODELS = { CLAUDE_OPENROUTER_MAP = {
"claude-sonnet-4-20250514", "claude-sonnet-4-20250514": "anthropic/claude-sonnet-4.5",
"claude-opus-4-20250514", "claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
"claude-haiku-4-20250514", "claude-opus-4-20250514": "anthropic/claude-opus-4.6",
"claude-opus-4.6": "anthropic/claude-opus-4.6",
} }
def _is_claude_model(model_id: str) -> bool: def _provider_for(model_id: str, openrouter_key: str) -> str:
return model_id in CLAUDE_MODELS or model_id.startswith("claude-") """Determine which OpenAI-compatible provider to route a chat model to."""
def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str:
if _is_claude_model(model_id):
return "claude"
if model_id.startswith("local/ollama/"): if model_id.startswith("local/ollama/"):
return "ollama" return "ollama"
if model_id.startswith("local/lmstudio/"): if model_id.startswith("local/lmstudio/"):
return "lmstudio" return "lmstudio"
if openrouter_key: # Everything else goes through OpenRouter (including mapped Claude models)
return "openrouter"
return "openrouter" return "openrouter"
@ -70,13 +70,13 @@ class LLMAdapter:
@property @property
def provider(self) -> str: def provider(self) -> str:
return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url) return _provider_for(self.current_model, self.openrouter_key)
def switch_model(self, model_id: str): def switch_model(self, model_id: str):
self.current_model = model_id self.current_model = model_id
log.info("Switched to model: %s (provider: %s)", model_id, self.provider) log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider)
# ── Main entry point ── # ── Chat Brain (OpenAI-compatible only) ──
def chat( def chat(
self, self,
@ -84,47 +84,66 @@ class LLMAdapter:
tools: list[dict] | None = None, tools: list[dict] | None = None,
stream: bool = True, stream: bool = True,
) -> Generator[dict, None, None]: ) -> Generator[dict, None, None]:
"""Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.""" """Chat brain: routes through OpenAI-compatible APIs only.
Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.
"""
provider = self.provider provider = self.provider
if provider == "claude": model_id = self._resolve_model_id(provider)
yield from self._chat_claude_sdk(messages, tools, stream)
else:
base_url, api_key = self._resolve_endpoint(provider)
model_id = self._resolve_model_id(provider)
yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
# ── Claude Code SDK (subprocess) ── # If a Claude model ID was selected, map it to OpenRouter equivalent
if model_id in CLAUDE_OPENROUTER_MAP:
def _chat_claude_sdk( if self.openrouter_key:
self, messages: list[dict], tools: list[dict] | None, stream: bool model_id = CLAUDE_OPENROUTER_MAP[model_id]
) -> Generator[dict, None, None]: provider = "openrouter"
# Separate system prompt from user messages
system_prompt = ""
user_prompt_parts = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if isinstance(content, list):
content = " ".join(c.get("text", "") for c in content if c.get("type") == "text")
if role == "system":
system_prompt += content + "\n"
elif role == "assistant":
user_prompt_parts.append(f"[Assistant]\n{content}")
else: else:
user_prompt_parts.append(content) yield {"type": "text", "content": (
user_prompt = "\n\n".join(user_prompt_parts) "To chat with Claude models, you need an OpenRouter API key "
"(set OPENROUTER_API_KEY in .env). Alternatively, select a local "
"model from Ollama or LM Studio."
)}
return
# Find claude CLI - on Windows needs .cmd extension for npm-installed binaries # Check if provider is available
claude_bin = shutil.which("claude") if provider == "openrouter" and not self.openrouter_key:
if not claude_bin: yield {"type": "text", "content": (
yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"} "No API key configured. To use cloud models:\n"
"1. Get an OpenRouter API key at https://openrouter.ai/keys\n"
"2. Set OPENROUTER_API_KEY in your .env file\n\n"
"Or install Ollama (free, local) and pull a model:\n"
" ollama pull llama3.2"
)}
return return
cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model, base_url, api_key = self._resolve_endpoint(provider)
"--output-format", "json", "--tools", ""] yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
if system_prompt.strip():
cmd.extend(["--system-prompt", system_prompt.strip()]) # ── Execution Brain (Claude Code CLI) ──
log.debug("Claude SDK using: %s", claude_bin)
def execute(
self,
prompt: str,
system_prompt: str = "",
working_dir: str | None = None,
) -> str:
"""Execution brain: calls Claude Code CLI with full tool access.
Used for heartbeat checks, scheduled tasks, and delegated complex tasks.
Returns the full result string (non-streaming).
"""
claude_bin = shutil.which("claude")
if not claude_bin:
return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"
cmd = [
claude_bin, "-p", prompt,
"--output-format", "json",
"--tools", "Bash,Read,Edit,Write,Glob,Grep",
]
if system_prompt:
cmd.extend(["--system-prompt", system_prompt])
log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...")
# Strip CLAUDECODE env var so the subprocess doesn't think it's nested # Strip CLAUDECODE env var so the subprocess doesn't think it's nested
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
@ -137,31 +156,35 @@ class LLMAdapter:
text=True, text=True,
encoding="utf-8", encoding="utf-8",
shell=(sys.platform == "win32"), shell=(sys.platform == "win32"),
cwd=working_dir,
env=env, env=env,
) )
except FileNotFoundError: except FileNotFoundError:
yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"} return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"
return
stdout, stderr = proc.communicate(timeout=120) try:
stdout, stderr = proc.communicate(timeout=300)
except subprocess.TimeoutExpired:
proc.kill()
return "Error: Claude Code execution timed out after 5 minutes."
if proc.returncode != 0: if proc.returncode != 0:
yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"} return f"Execution error: {stderr or 'unknown error'}"
return
# --output-format json returns a single JSON object
try: try:
result = json.loads(stdout) result = json.loads(stdout)
text = result.get("result", "") text = result.get("result", "")
if text: if text:
yield {"type": "text", "content": text} return text
elif result.get("is_error"): if result.get("is_error"):
yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"} return f"Execution error: {result.get('result', 'unknown')}"
return return "(No output from execution brain)"
except json.JSONDecodeError: except json.JSONDecodeError:
# Fallback: treat as plain text return stdout.strip() if stdout.strip() else "(No output from execution brain)"
if stdout.strip():
yield {"type": "text", "content": stdout.strip()} def is_execution_brain_available(self) -> bool:
"""Check if the Claude Code CLI is available."""
return shutil.which("claude") is not None
# ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ── # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──
@ -259,25 +282,6 @@ class LLMAdapter:
return model.removeprefix("local/lmstudio/") return model.removeprefix("local/lmstudio/")
return model return model
def _messages_to_prompt(self, messages: list[dict]) -> str:
"""Flatten messages into a single prompt string for Claude SDK -p flag."""
parts = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if isinstance(content, list):
# multimodal - extract text parts
content = " ".join(
c.get("text", "") for c in content if c.get("type") == "text"
)
if role == "system":
parts.append(f"[System]\n{content}")
elif role == "assistant":
parts.append(f"[Assistant]\n{content}")
else:
parts.append(content)
return "\n\n".join(parts)
def _get_openai(self): def _get_openai(self):
if self._openai_mod is None: if self._openai_mod is None:
import openai import openai
@ -314,21 +318,32 @@ class LLMAdapter:
pass pass
return models return models
def list_available_models(self) -> list[ModelInfo]: def list_chat_models(self) -> list[ModelInfo]:
"""Return all available models across all providers.""" """Return models available for the chat brain (no direct Claude SDK entries)."""
models = [ models = []
ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"),
ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"),
ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"),
]
if self.openrouter_key: if self.openrouter_key:
models.extend([ models.extend([
ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"), # Anthropic (via OpenRouter — system prompts work correctly)
ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"),
ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"),
# Google
ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"),
ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"),
ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"),
# OpenAI
ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"),
ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"), ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"), # DeepSeek / xAI / Others
ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"), ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"),
ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"), ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"),
ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"), ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"),
ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"),
]) ])
models.extend(self.discover_local_models()) models.extend(self.discover_local_models())
return models return models
def list_available_models(self) -> list[ModelInfo]:
"""Backwards-compatible alias for list_chat_models()."""
return self.list_chat_models()

View File

@ -60,7 +60,7 @@ class Scheduler:
for task in tasks: for task in tasks:
try: try:
log.info("Running scheduled task: %s", task["name"]) log.info("Running scheduled task: %s", task["name"])
result = self.agent.respond_to_prompt(task["prompt"]) result = self.agent.execute_task(task["prompt"])
self.db.log_task_run(task["id"], result=result[:2000]) self.db.log_task_run(task["id"], result=result[:2000])
# Calculate next run # Calculate next run
@ -107,7 +107,7 @@ class Scheduler:
f"{checklist}" f"{checklist}"
) )
result = self.agent.respond_to_prompt(prompt) result = self.agent.execute_task(prompt, system_context=checklist)
if HEARTBEAT_OK in result: if HEARTBEAT_OK in result:
log.debug("Heartbeat: all clear") log.debug("Heartbeat: all clear")

View File

@ -0,0 +1,30 @@
"""Delegate tool: bridges chat brain to execution brain.
When the chat model needs to run commands, edit files, or do anything
requiring system-level access, it calls this tool. The task is passed
to the execution brain (Claude Code CLI) which has full tool access.
"""
from __future__ import annotations
from . import tool
@tool(
"delegate_task",
description=(
"Delegate a complex task to the execution brain (Claude Code CLI). "
"Use this when you need to: run shell commands, read/write/edit files, "
"check system status, inspect the codebase, or perform any system-level "
"operation. Describe the task clearly and the execution brain will carry "
"it out using its full tool suite (Bash, Read, Edit, Write, Glob, Grep)."
),
category="system",
)
def delegate_task(task_description: str, ctx: dict = None) -> str:
"""Delegate a task to the execution brain."""
if not ctx or "agent" not in ctx:
return "Error: delegate tool requires agent context."
agent = ctx["agent"]
return agent.execute_task(task_description)

View File

@ -25,12 +25,19 @@ footer { display: none !important; }
def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
"""Build and return the Gradio app.""" """Build and return the Gradio app."""
available_models = llm.list_available_models() available_models = llm.list_chat_models()
model_choices = [(m.name, m.id) for m in available_models] model_choices = [(m.name, m.id) for m in available_models]
current_model = llm.current_model current_model = llm.current_model
exec_status = "available" if llm.is_execution_brain_available() else "unavailable"
with gr.Blocks(title="CheddahBot") as app: with gr.Blocks(title="CheddahBot") as app:
gr.Markdown("# CheddahBot", elem_classes=["contain"]) gr.Markdown("# CheddahBot", elem_classes=["contain"])
gr.Markdown(
f"*Chat Brain:* `{current_model}` &nbsp;|&nbsp; "
f"*Execution Brain (Claude Code CLI):* `{exec_status}`",
elem_classes=["contain"],
)
with gr.Row(elem_classes=["contain"]): with gr.Row(elem_classes=["contain"]):
model_dropdown = gr.Dropdown( model_dropdown = gr.Dropdown(
@ -38,6 +45,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
value=current_model, value=current_model,
label="Model", label="Model",
interactive=True, interactive=True,
allow_custom_value=True,
scale=3, scale=3,
) )
refresh_btn = gr.Button("Refresh", scale=0, min_width=70) refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
@ -90,7 +98,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
return f"Switched to {model_id}" return f"Switched to {model_id}"
def on_refresh_models(): def on_refresh_models():
models = llm.list_available_models() models = llm.list_chat_models()
choices = [(m.name, m.id) for m in models] choices = [(m.name, m.id) for m in models]
return gr.update(choices=choices, value=llm.current_model) return gr.update(choices=choices, value=llm.current_model)
@ -103,6 +111,8 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs] return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]
def on_user_message(message, chat_history): def on_user_message(message, chat_history):
chat_history = chat_history or []
# Extract text and files from MultimodalTextbox # Extract text and files from MultimodalTextbox
if isinstance(message, dict): if isinstance(message, dict):
text = message.get("text", "") text = message.get("text", "")
@ -140,12 +150,22 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
yield chat_history, gr.update(value=None) yield chat_history, gr.update(value=None)
# Stream assistant response # Stream assistant response
response_text = "" try:
chat_history = chat_history + [{"role": "assistant", "content": ""}] response_text = ""
chat_history = chat_history + [{"role": "assistant", "content": ""}]
for chunk in agent.respond(text, files=processed_files): for chunk in agent.respond(text, files=processed_files):
response_text += chunk response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text} chat_history[-1] = {"role": "assistant", "content": response_text}
yield chat_history, gr.update(value=None)
# If no response came through, show a fallback
if not response_text:
chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"}
yield chat_history, gr.update(value=None)
except Exception as e:
log.error("Error in agent.respond: %s", e, exc_info=True)
chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}]
yield chat_history, gr.update(value=None) yield chat_history, gr.update(value=None)
def on_voice_chat(audio_path): def on_voice_chat(audio_path):

View File

@ -1,6 +1,9 @@
# CheddahBot Configuration # CheddahBot Configuration
# Default model to use on startup # Chat model (for UI conversations - needs OpenRouter key or local model)
chat_model: "openai/gpt-4o-mini"
# Execution model (Claude Code CLI - uses Max subscription for heartbeat/scheduler)
default_model: "claude-sonnet-4-20250514" default_model: "claude-sonnet-4-20250514"
# Gradio server settings # Gradio server settings

View File

@ -3,6 +3,8 @@
## Identity ## Identity
- Name: (your name here) - Name: (your name here)
- How to address: (first name, nickname, etc.) - How to address: (first name, nickname, etc.)
- Origin: Cheddah is named after the user's Xbox Live gamertag, "CheddahYetti."
- Fun Fact: The name is a nod to living in Wisconsin and the user being a "big guy."
## Context ## Context
- Technical level: (beginner/intermediate/advanced) - Technical level: (beginner/intermediate/advanced)