Implement two-brain architecture: chat brain + execution brain

Chat brain uses OpenAI-compatible APIs (OpenRouter/Ollama/LM Studio) for
all UI conversations, giving full control over system prompts so the
Cheddah personality works correctly. Execution brain uses Claude Code CLI
for heartbeat, scheduled tasks, and delegated system-level work.

- Split llm.py: chat() routes through OpenAI-compat only, new execute()
  calls Claude CLI with Bash/Read/Edit/Write/Glob/Grep tools
- Add chat_model config field (default: openai/gpt-4o-mini)
- Add delegate_task tool bridging chat brain to execution brain
- Scheduler/heartbeat now use execute_task() for real CLI power
- UI dropdown shows chat-only models with custom value support
- Updated model list to current OpenRouter top models (Feb 2026)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
cora-start
PeninsulaInd 2026-02-13 22:59:32 -06:00
parent 1b73cf2e5d
commit af767f9684
9 changed files with 211 additions and 115 deletions

View File

@ -24,13 +24,18 @@ def main():
log.info("Initializing database...")
db = Database(config.db_path)
log.info("Initializing LLM adapter (default model: %s)...", config.default_model)
log.info("Chat brain model: %s", config.chat_model)
log.info("Execution brain model: %s (Claude Code CLI)", config.default_model)
llm = LLMAdapter(
default_model=config.default_model,
default_model=config.chat_model,
openrouter_key=config.openrouter_api_key,
ollama_url=config.ollama_url,
lmstudio_url=config.lmstudio_url,
)
if llm.is_execution_brain_available():
log.info("Execution brain: Claude Code CLI found in PATH")
else:
log.warning("Execution brain: Claude Code CLI NOT found — heartbeat/scheduler tasks will fail")
log.info("Creating agent...")
agent = Agent(config, db, llm)

View File

@ -132,3 +132,21 @@ class Agent:
for chunk in self.respond(prompt):
result_parts.append(chunk)
return "".join(result_parts)
def execute_task(self, prompt: str, system_context: str = "") -> str:
"""Execute a task using the execution brain (Claude Code CLI).
Used by heartbeat, scheduler, and the delegate tool.
Logs the result to daily memory if available.
"""
log.info("Execution brain task: %s", prompt[:100])
result = self.llm.execute(prompt, system_prompt=system_context)
# Log to daily memory
if self._memory:
try:
self._memory.log_daily(f"[Execution] {prompt[:200]}\n{result[:500]}")
except Exception as e:
log.warning("Failed to log execution to memory: %s", e)
return result

View File

@ -35,6 +35,7 @@ class ShellConfig:
@dataclass
class Config:
chat_model: str = "openai/gpt-4o-mini"
default_model: str = "claude-sonnet-4-20250514"
host: str = "0.0.0.0"
port: int = 7860
@ -63,7 +64,7 @@ def load_config() -> Config:
if yaml_path.exists():
with open(yaml_path) as f:
data = yaml.safe_load(f) or {}
for key in ("default_model", "host", "port", "ollama_url", "lmstudio_url"):
for key in ("chat_model", "default_model", "host", "port", "ollama_url", "lmstudio_url"):
if key in data:
setattr(cfg, key, data[key])
if "memory" in data and isinstance(data["memory"], dict):
@ -81,6 +82,8 @@ def load_config() -> Config:
# Env var overrides (CHEDDAH_ prefix)
cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "")
if cm := os.getenv("CHEDDAH_CHAT_MODEL"):
cfg.chat_model = cm
if m := os.getenv("CHEDDAH_DEFAULT_MODEL"):
cfg.default_model = m
if h := os.getenv("CHEDDAH_HOST"):

View File

@ -1,9 +1,14 @@
"""Model-agnostic LLM adapter.
"""Two-brain LLM adapter.
Routing:
- Claude models Claude Code SDK (subprocess, uses Max subscription)
- Cloud models OpenRouter (single API key, OpenAI-compatible)
- Local models direct HTTP (Ollama / LM Studio, OpenAI-compatible)
Chat Brain:
- OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs)
- Full control over system prompt Cheddah personality works here
- Claude models available via OpenRouter mapping
Execution Brain:
- Claude Code CLI (subprocess)
- Used for heartbeat, scheduled tasks, delegated system-level work
- Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here
"""
from __future__ import annotations
@ -26,31 +31,26 @@ log = logging.getLogger(__name__)
class ModelInfo:
id: str
name: str
provider: str # "claude" | "openrouter" | "ollama" | "lmstudio"
provider: str # "openrouter" | "ollama" | "lmstudio"
context_length: int | None = None
# Well-known Claude models that route through the SDK
CLAUDE_MODELS = {
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-haiku-4-20250514",
# Claude model IDs → OpenRouter equivalents (for chat dropdown)
CLAUDE_OPENROUTER_MAP = {
"claude-sonnet-4-20250514": "anthropic/claude-sonnet-4.5",
"claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
"claude-opus-4-20250514": "anthropic/claude-opus-4.6",
"claude-opus-4.6": "anthropic/claude-opus-4.6",
}
def _is_claude_model(model_id: str) -> bool:
return model_id in CLAUDE_MODELS or model_id.startswith("claude-")
def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str:
if _is_claude_model(model_id):
return "claude"
def _provider_for(model_id: str, openrouter_key: str) -> str:
"""Determine which OpenAI-compatible provider to route a chat model to."""
if model_id.startswith("local/ollama/"):
return "ollama"
if model_id.startswith("local/lmstudio/"):
return "lmstudio"
if openrouter_key:
return "openrouter"
# Everything else goes through OpenRouter (including mapped Claude models)
return "openrouter"
@ -70,13 +70,13 @@ class LLMAdapter:
@property
def provider(self) -> str:
return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url)
return _provider_for(self.current_model, self.openrouter_key)
def switch_model(self, model_id: str):
self.current_model = model_id
log.info("Switched to model: %s (provider: %s)", model_id, self.provider)
log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider)
# ── Main entry point ──
# ── Chat Brain (OpenAI-compatible only) ──
def chat(
self,
@ -84,47 +84,66 @@ class LLMAdapter:
tools: list[dict] | None = None,
stream: bool = True,
) -> Generator[dict, None, None]:
"""Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}."""
"""Chat brain: routes through OpenAI-compatible APIs only.
Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.
"""
provider = self.provider
if provider == "claude":
yield from self._chat_claude_sdk(messages, tools, stream)
else:
base_url, api_key = self._resolve_endpoint(provider)
model_id = self._resolve_model_id(provider)
yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
model_id = self._resolve_model_id(provider)
# ── Claude Code SDK (subprocess) ──
def _chat_claude_sdk(
self, messages: list[dict], tools: list[dict] | None, stream: bool
) -> Generator[dict, None, None]:
# Separate system prompt from user messages
system_prompt = ""
user_prompt_parts = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if isinstance(content, list):
content = " ".join(c.get("text", "") for c in content if c.get("type") == "text")
if role == "system":
system_prompt += content + "\n"
elif role == "assistant":
user_prompt_parts.append(f"[Assistant]\n{content}")
# If a Claude model ID was selected, map it to OpenRouter equivalent
if model_id in CLAUDE_OPENROUTER_MAP:
if self.openrouter_key:
model_id = CLAUDE_OPENROUTER_MAP[model_id]
provider = "openrouter"
else:
user_prompt_parts.append(content)
user_prompt = "\n\n".join(user_prompt_parts)
yield {"type": "text", "content": (
"To chat with Claude models, you need an OpenRouter API key "
"(set OPENROUTER_API_KEY in .env). Alternatively, select a local "
"model from Ollama or LM Studio."
)}
return
# Find claude CLI - on Windows needs .cmd extension for npm-installed binaries
claude_bin = shutil.which("claude")
if not claude_bin:
yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
# Check if provider is available
if provider == "openrouter" and not self.openrouter_key:
yield {"type": "text", "content": (
"No API key configured. To use cloud models:\n"
"1. Get an OpenRouter API key at https://openrouter.ai/keys\n"
"2. Set OPENROUTER_API_KEY in your .env file\n\n"
"Or install Ollama (free, local) and pull a model:\n"
" ollama pull llama3.2"
)}
return
cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model,
"--output-format", "json", "--tools", ""]
if system_prompt.strip():
cmd.extend(["--system-prompt", system_prompt.strip()])
log.debug("Claude SDK using: %s", claude_bin)
base_url, api_key = self._resolve_endpoint(provider)
yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
# ── Execution Brain (Claude Code CLI) ──
def execute(
self,
prompt: str,
system_prompt: str = "",
working_dir: str | None = None,
) -> str:
"""Execution brain: calls Claude Code CLI with full tool access.
Used for heartbeat checks, scheduled tasks, and delegated complex tasks.
Returns the full result string (non-streaming).
"""
claude_bin = shutil.which("claude")
if not claude_bin:
return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"
cmd = [
claude_bin, "-p", prompt,
"--output-format", "json",
"--tools", "Bash,Read,Edit,Write,Glob,Grep",
]
if system_prompt:
cmd.extend(["--system-prompt", system_prompt])
log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...")
# Strip CLAUDECODE env var so the subprocess doesn't think it's nested
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
@ -137,31 +156,35 @@ class LLMAdapter:
text=True,
encoding="utf-8",
shell=(sys.platform == "win32"),
cwd=working_dir,
env=env,
)
except FileNotFoundError:
yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
return
return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"
stdout, stderr = proc.communicate(timeout=120)
try:
stdout, stderr = proc.communicate(timeout=300)
except subprocess.TimeoutExpired:
proc.kill()
return "Error: Claude Code execution timed out after 5 minutes."
if proc.returncode != 0:
yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"}
return
return f"Execution error: {stderr or 'unknown error'}"
# --output-format json returns a single JSON object
try:
result = json.loads(stdout)
text = result.get("result", "")
if text:
yield {"type": "text", "content": text}
elif result.get("is_error"):
yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"}
return
return text
if result.get("is_error"):
return f"Execution error: {result.get('result', 'unknown')}"
return "(No output from execution brain)"
except json.JSONDecodeError:
# Fallback: treat as plain text
if stdout.strip():
yield {"type": "text", "content": stdout.strip()}
return stdout.strip() if stdout.strip() else "(No output from execution brain)"
def is_execution_brain_available(self) -> bool:
"""Check if the Claude Code CLI is available."""
return shutil.which("claude") is not None
# ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──
@ -259,25 +282,6 @@ class LLMAdapter:
return model.removeprefix("local/lmstudio/")
return model
def _messages_to_prompt(self, messages: list[dict]) -> str:
"""Flatten messages into a single prompt string for Claude SDK -p flag."""
parts = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if isinstance(content, list):
# multimodal - extract text parts
content = " ".join(
c.get("text", "") for c in content if c.get("type") == "text"
)
if role == "system":
parts.append(f"[System]\n{content}")
elif role == "assistant":
parts.append(f"[Assistant]\n{content}")
else:
parts.append(content)
return "\n\n".join(parts)
def _get_openai(self):
if self._openai_mod is None:
import openai
@ -314,21 +318,32 @@ class LLMAdapter:
pass
return models
def list_available_models(self) -> list[ModelInfo]:
"""Return all available models across all providers."""
models = [
ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"),
ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"),
ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"),
]
def list_chat_models(self) -> list[ModelInfo]:
"""Return models available for the chat brain (no direct Claude SDK entries)."""
models = []
if self.openrouter_key:
models.extend([
ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"),
# Anthropic (via OpenRouter — system prompts work correctly)
ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"),
ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"),
# Google
ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"),
ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"),
ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"),
# OpenAI
ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"),
ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"),
ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"),
ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"),
ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"),
# DeepSeek / xAI / Others
ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"),
ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"),
ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"),
ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"),
])
models.extend(self.discover_local_models())
return models
def list_available_models(self) -> list[ModelInfo]:
"""Backwards-compatible alias for list_chat_models()."""
return self.list_chat_models()

View File

@ -60,7 +60,7 @@ class Scheduler:
for task in tasks:
try:
log.info("Running scheduled task: %s", task["name"])
result = self.agent.respond_to_prompt(task["prompt"])
result = self.agent.execute_task(task["prompt"])
self.db.log_task_run(task["id"], result=result[:2000])
# Calculate next run
@ -107,7 +107,7 @@ class Scheduler:
f"{checklist}"
)
result = self.agent.respond_to_prompt(prompt)
result = self.agent.execute_task(prompt, system_context=checklist)
if HEARTBEAT_OK in result:
log.debug("Heartbeat: all clear")

View File

@ -0,0 +1,30 @@
"""Delegate tool: bridges chat brain to execution brain.
When the chat model needs to run commands, edit files, or do anything
requiring system-level access, it calls this tool. The task is passed
to the execution brain (Claude Code CLI) which has full tool access.
"""
from __future__ import annotations
from . import tool
@tool(
"delegate_task",
description=(
"Delegate a complex task to the execution brain (Claude Code CLI). "
"Use this when you need to: run shell commands, read/write/edit files, "
"check system status, inspect the codebase, or perform any system-level "
"operation. Describe the task clearly and the execution brain will carry "
"it out using its full tool suite (Bash, Read, Edit, Write, Glob, Grep)."
),
category="system",
)
def delegate_task(task_description: str, ctx: dict = None) -> str:
"""Delegate a task to the execution brain."""
if not ctx or "agent" not in ctx:
return "Error: delegate tool requires agent context."
agent = ctx["agent"]
return agent.execute_task(task_description)

View File

@ -25,12 +25,19 @@ footer { display: none !important; }
def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
"""Build and return the Gradio app."""
available_models = llm.list_available_models()
available_models = llm.list_chat_models()
model_choices = [(m.name, m.id) for m in available_models]
current_model = llm.current_model
exec_status = "available" if llm.is_execution_brain_available() else "unavailable"
with gr.Blocks(title="CheddahBot") as app:
gr.Markdown("# CheddahBot", elem_classes=["contain"])
gr.Markdown(
f"*Chat Brain:* `{current_model}` &nbsp;|&nbsp; "
f"*Execution Brain (Claude Code CLI):* `{exec_status}`",
elem_classes=["contain"],
)
with gr.Row(elem_classes=["contain"]):
model_dropdown = gr.Dropdown(
@ -38,6 +45,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
value=current_model,
label="Model",
interactive=True,
allow_custom_value=True,
scale=3,
)
refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
@ -90,7 +98,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
return f"Switched to {model_id}"
def on_refresh_models():
models = llm.list_available_models()
models = llm.list_chat_models()
choices = [(m.name, m.id) for m in models]
return gr.update(choices=choices, value=llm.current_model)
@ -103,6 +111,8 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]
def on_user_message(message, chat_history):
chat_history = chat_history or []
# Extract text and files from MultimodalTextbox
if isinstance(message, dict):
text = message.get("text", "")
@ -140,12 +150,22 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
yield chat_history, gr.update(value=None)
# Stream assistant response
response_text = ""
chat_history = chat_history + [{"role": "assistant", "content": ""}]
try:
response_text = ""
chat_history = chat_history + [{"role": "assistant", "content": ""}]
for chunk in agent.respond(text, files=processed_files):
response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text}
for chunk in agent.respond(text, files=processed_files):
response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text}
yield chat_history, gr.update(value=None)
# If no response came through, show a fallback
if not response_text:
chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"}
yield chat_history, gr.update(value=None)
except Exception as e:
log.error("Error in agent.respond: %s", e, exc_info=True)
chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}]
yield chat_history, gr.update(value=None)
def on_voice_chat(audio_path):

View File

@ -1,6 +1,9 @@
# CheddahBot Configuration
# Default model to use on startup
# Chat model (for UI conversations - needs OpenRouter key or local model)
chat_model: "openai/gpt-4o-mini"
# Execution model (Claude Code CLI - uses Max subscription for heartbeat/scheduler)
default_model: "claude-sonnet-4-20250514"
# Gradio server settings

View File

@ -3,6 +3,8 @@
## Identity
- Name: (your name here)
- How to address: (first name, nickname, etc.)
- Origin: Cheddah is named after the user's Xbox Live gamertag, "CheddahYetti."
- Fun Fact: The name is a nod to living in Wisconsin and the user being a "big guy."
## Context
- Technical level: (beginner/intermediate/advanced)