Implement two-brain architecture: chat brain + execution brain

Chat brain uses OpenAI-compatible APIs (OpenRouter/Ollama/LM Studio) for all UI conversations, giving full control over system prompts so the Cheddah personality works correctly. Execution brain uses Claude Code CLI for heartbeat, scheduled tasks, and delegated system-level work. - Split llm.py: chat() routes through OpenAI-compat only, new execute() calls Claude CLI with Bash/Read/Edit/Write/Glob/Grep tools - Add chat_model config field (default: openai/gpt-4o-mini) - Add delegate_task tool bridging chat brain to execution brain - Scheduler/heartbeat now use execute_task() for real CLI power - UI dropdown shows chat-only models with custom value support - Updated model list to current OpenRouter top models (Feb 2026) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 22:59:32 -06:00 · 2026-02-13 22:59:32 -06:00 · af767f9684
parent 1b73cf2e5d
commit af767f9684
9 changed files with 211 additions and 115 deletions
--- a/cheddahbot/main.py
+++ b/cheddahbot/main.py
@ -24,13 +24,18 @@ def main():
    log.info("Initializing database...")
    db = Database(config.db_path)
-    log.info("Initializing LLM adapter (default model: %s)...", config.default_model)
+    log.info("Chat brain model: %s", config.chat_model)
    log.info("Execution brain model: %s (Claude Code CLI)", config.default_model)
    llm = LLMAdapter(
-        default_model=config.default_model,
+        default_model=config.chat_model,
        openrouter_key=config.openrouter_api_key,
        ollama_url=config.ollama_url,
        lmstudio_url=config.lmstudio_url,
    )
    if llm.is_execution_brain_available():
        log.info("Execution brain: Claude Code CLI found in PATH")
    else:
        log.warning("Execution brain: Claude Code CLI NOT found — heartbeat/scheduler tasks will fail")
    log.info("Creating agent...")
    agent = Agent(config, db, llm)
--- a/cheddahbot/agent.py
+++ b/cheddahbot/agent.py
@ -132,3 +132,21 @@ class Agent:
        for chunk in self.respond(prompt):
            result_parts.append(chunk)
        return "".join(result_parts)
    def execute_task(self, prompt: str, system_context: str = "") -> str:
        """Execute a task using the execution brain (Claude Code CLI).
        Used by heartbeat, scheduler, and the delegate tool.
        Logs the result to daily memory if available.
        """
        log.info("Execution brain task: %s", prompt[:100])
        result = self.llm.execute(prompt, system_prompt=system_context)
        # Log to daily memory
        if self._memory:
            try:
                self._memory.log_daily(f"[Execution] {prompt[:200]}\n→ {result[:500]}")
            except Exception as e:
                log.warning("Failed to log execution to memory: %s", e)
        return result
--- a/cheddahbot/config.py
+++ b/cheddahbot/config.py
@ -35,6 +35,7 @@ class ShellConfig:
@dataclass
 class Config:
    chat_model: str = "openai/gpt-4o-mini"
    default_model: str = "claude-sonnet-4-20250514"
    host: str = "0.0.0.0"
    port: int = 7860
@ -63,7 +64,7 @@ def load_config() -> Config:
    if yaml_path.exists():
        with open(yaml_path) as f:
            data = yaml.safe_load(f) or {}
-        for key in ("default_model", "host", "port", "ollama_url", "lmstudio_url"):
+        for key in ("chat_model", "default_model", "host", "port", "ollama_url", "lmstudio_url"):
            if key in data:
                setattr(cfg, key, data[key])
        if "memory" in data and isinstance(data["memory"], dict):
@ -81,6 +82,8 @@ def load_config() -> Config:
    # Env var overrides (CHEDDAH_ prefix)
    cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "")
    if cm := os.getenv("CHEDDAH_CHAT_MODEL"):
        cfg.chat_model = cm
    if m := os.getenv("CHEDDAH_DEFAULT_MODEL"):
        cfg.default_model = m
    if h := os.getenv("CHEDDAH_HOST"):
--- a/cheddahbot/llm.py
+++ b/cheddahbot/llm.py
@ -1,9 +1,14 @@
-"""Model-agnostic LLM adapter.
+"""Two-brain LLM adapter.
-Routing:
+Chat Brain:
-  - Claude models → Claude Code SDK (subprocess, uses Max subscription)
+  - OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs)
-  - Cloud models  → OpenRouter (single API key, OpenAI-compatible)
+  - Full control over system prompt — Cheddah personality works here
-  - Local models  → direct HTTP (Ollama / LM Studio, OpenAI-compatible)
+  - Claude models available via OpenRouter mapping
 Execution Brain:
  - Claude Code CLI (subprocess)
  - Used for heartbeat, scheduled tasks, delegated system-level work
  - Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here
 """
 from __future__ import annotations
@ -26,31 +31,26 @@ log = logging.getLogger(__name__)
 class ModelInfo:
    id: str
    name: str
-    provider: str  # "claude" | "openrouter" | "ollama" | "lmstudio"
+    provider: str  # "openrouter" | "ollama" | "lmstudio"
    context_length: int | None = None
-# Well-known Claude models that route through the SDK
+# Claude model IDs → OpenRouter equivalents (for chat dropdown)
-CLAUDE_MODELS = {
+CLAUDE_OPENROUTER_MAP = {
-    "claude-sonnet-4-20250514",
+    "claude-sonnet-4-20250514": "anthropic/claude-sonnet-4.5",
-    "claude-opus-4-20250514",
+    "claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
-    "claude-haiku-4-20250514",
+    "claude-opus-4-20250514": "anthropic/claude-opus-4.6",
    "claude-opus-4.6": "anthropic/claude-opus-4.6",
 }
-def _is_claude_model(model_id: str) -> bool:
+def _provider_for(model_id: str, openrouter_key: str) -> str:
-    return model_id in CLAUDE_MODELS or model_id.startswith("claude-")
+    """Determine which OpenAI-compatible provider to route a chat model to."""
 def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str:
    if _is_claude_model(model_id):
        return "claude"
    if model_id.startswith("local/ollama/"):
        return "ollama"
    if model_id.startswith("local/lmstudio/"):
        return "lmstudio"
-    if openrouter_key:
+    # Everything else goes through OpenRouter (including mapped Claude models)
        return "openrouter"
    return "openrouter"
@ -70,13 +70,13 @@ class LLMAdapter:
    @property
    def provider(self) -> str:
-        return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url)
+        return _provider_for(self.current_model, self.openrouter_key)
    def switch_model(self, model_id: str):
        self.current_model = model_id
-        log.info("Switched to model: %s (provider: %s)", model_id, self.provider)
+        log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider)
-    # ── Main entry point ──
+    # ── Chat Brain (OpenAI-compatible only) ──
    def chat(
        self,
@ -84,47 +84,66 @@ class LLMAdapter:
        tools: list[dict] | None = None,
        stream: bool = True,
    ) -> Generator[dict, None, None]:
-        """Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}."""
+        """Chat brain: routes through OpenAI-compatible APIs only.
        Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.
        """
        provider = self.provider
-        if provider == "claude":
+        model_id = self._resolve_model_id(provider)
            yield from self._chat_claude_sdk(messages, tools, stream)
        else:
            base_url, api_key = self._resolve_endpoint(provider)
            model_id = self._resolve_model_id(provider)
            yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
-    # ── Claude Code SDK (subprocess) ──
+        # If a Claude model ID was selected, map it to OpenRouter equivalent
-
+        if model_id in CLAUDE_OPENROUTER_MAP:
-    def _chat_claude_sdk(
+            if self.openrouter_key:
-        self, messages: list[dict], tools: list[dict] | None, stream: bool
+                model_id = CLAUDE_OPENROUTER_MAP[model_id]
-    ) -> Generator[dict, None, None]:
+                provider = "openrouter"
        # Separate system prompt from user messages
        system_prompt = ""
        user_prompt_parts = []
        for m in messages:
            role = m.get("role", "user")
            content = m.get("content", "")
            if isinstance(content, list):
                content = " ".join(c.get("text", "") for c in content if c.get("type") == "text")
            if role == "system":
                system_prompt += content + "\n"
            elif role == "assistant":
                user_prompt_parts.append(f"[Assistant]\n{content}")
            else:
-                user_prompt_parts.append(content)
+                yield {"type": "text", "content": (
-        user_prompt = "\n\n".join(user_prompt_parts)
+                    "To chat with Claude models, you need an OpenRouter API key "
                    "(set OPENROUTER_API_KEY in .env). Alternatively, select a local "
                    "model from Ollama or LM Studio."
                )}
                return
-        # Find claude CLI - on Windows needs .cmd extension for npm-installed binaries
+        # Check if provider is available
-        claude_bin = shutil.which("claude")
+        if provider == "openrouter" and not self.openrouter_key:
-        if not claude_bin:
+            yield {"type": "text", "content": (
-            yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
+                "No API key configured. To use cloud models:\n"
                "1. Get an OpenRouter API key at https://openrouter.ai/keys\n"
                "2. Set OPENROUTER_API_KEY in your .env file\n\n"
                "Or install Ollama (free, local) and pull a model:\n"
                "  ollama pull llama3.2"
            )}
            return
-        cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model,
+        base_url, api_key = self._resolve_endpoint(provider)
-               "--output-format", "json", "--tools", ""]
+        yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
-        if system_prompt.strip():
+
-            cmd.extend(["--system-prompt", system_prompt.strip()])
+    # ── Execution Brain (Claude Code CLI) ──
-        log.debug("Claude SDK using: %s", claude_bin)
+
    def execute(
        self,
        prompt: str,
        system_prompt: str = "",
        working_dir: str | None = None,
    ) -> str:
        """Execution brain: calls Claude Code CLI with full tool access.
        Used for heartbeat checks, scheduled tasks, and delegated complex tasks.
        Returns the full result string (non-streaming).
        """
        claude_bin = shutil.which("claude")
        if not claude_bin:
            return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"
        cmd = [
            claude_bin, "-p", prompt,
            "--output-format", "json",
            "--tools", "Bash,Read,Edit,Write,Glob,Grep",
        ]
        if system_prompt:
            cmd.extend(["--system-prompt", system_prompt])
        log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...")
        # Strip CLAUDECODE env var so the subprocess doesn't think it's nested
        env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
@ -137,31 +156,35 @@ class LLMAdapter:
                text=True,
                encoding="utf-8",
                shell=(sys.platform == "win32"),
                cwd=working_dir,
                env=env,
            )
        except FileNotFoundError:
-            yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
+            return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"
            return
-        stdout, stderr = proc.communicate(timeout=120)
+        try:
            stdout, stderr = proc.communicate(timeout=300)
        except subprocess.TimeoutExpired:
            proc.kill()
            return "Error: Claude Code execution timed out after 5 minutes."
        if proc.returncode != 0:
-            yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"}
+            return f"Execution error: {stderr or 'unknown error'}"
            return
        # --output-format json returns a single JSON object
        try:
            result = json.loads(stdout)
            text = result.get("result", "")
            if text:
-                yield {"type": "text", "content": text}
+                return text
-            elif result.get("is_error"):
+            if result.get("is_error"):
-                yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"}
+                return f"Execution error: {result.get('result', 'unknown')}"
-            return
+            return "(No output from execution brain)"
        except json.JSONDecodeError:
-            # Fallback: treat as plain text
+            return stdout.strip() if stdout.strip() else "(No output from execution brain)"
-            if stdout.strip():
+
-                yield {"type": "text", "content": stdout.strip()}
+    def is_execution_brain_available(self) -> bool:
        """Check if the Claude Code CLI is available."""
        return shutil.which("claude") is not None
    # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──
@ -259,25 +282,6 @@ class LLMAdapter:
            return model.removeprefix("local/lmstudio/")
        return model
    def _messages_to_prompt(self, messages: list[dict]) -> str:
        """Flatten messages into a single prompt string for Claude SDK -p flag."""
        parts = []
        for m in messages:
            role = m.get("role", "user")
            content = m.get("content", "")
            if isinstance(content, list):
                # multimodal - extract text parts
                content = " ".join(
                    c.get("text", "") for c in content if c.get("type") == "text"
                )
            if role == "system":
                parts.append(f"[System]\n{content}")
            elif role == "assistant":
                parts.append(f"[Assistant]\n{content}")
            else:
                parts.append(content)
        return "\n\n".join(parts)
    def _get_openai(self):
        if self._openai_mod is None:
            import openai
@ -314,21 +318,32 @@ class LLMAdapter:
            pass
        return models
-    def list_available_models(self) -> list[ModelInfo]:
+    def list_chat_models(self) -> list[ModelInfo]:
-        """Return all available models across all providers."""
+        """Return models available for the chat brain (no direct Claude SDK entries)."""
-        models = [
+        models = []
-            ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"),
+
            ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"),
            ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"),
        ]
        if self.openrouter_key:
            models.extend([
-                ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"),
+                # Anthropic (via OpenRouter — system prompts work correctly)
                ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"),
                ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"),
                # Google
                ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"),
                ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"),
                ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"),
                # OpenAI
                ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"),
                ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
-                ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"),
+                # DeepSeek / xAI / Others
-                ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"),
+                ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"),
-                ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"),
+                ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"),
-                ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"),
+                ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"),
                ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"),
            ])
        models.extend(self.discover_local_models())
        return models
    def list_available_models(self) -> list[ModelInfo]:
        """Backwards-compatible alias for list_chat_models()."""
        return self.list_chat_models()
--- a/cheddahbot/scheduler.py
+++ b/cheddahbot/scheduler.py
@ -60,7 +60,7 @@ class Scheduler:
        for task in tasks:
            try:
                log.info("Running scheduled task: %s", task["name"])
-                result = self.agent.respond_to_prompt(task["prompt"])
+                result = self.agent.execute_task(task["prompt"])
                self.db.log_task_run(task["id"], result=result[:2000])
                # Calculate next run
@ -107,7 +107,7 @@ class Scheduler:
            f"{checklist}"
        )
-        result = self.agent.respond_to_prompt(prompt)
+        result = self.agent.execute_task(prompt, system_context=checklist)
        if HEARTBEAT_OK in result:
            log.debug("Heartbeat: all clear")
--- a/cheddahbot/tools/delegate.py
+++ b/cheddahbot/tools/delegate.py
@ -0,0 +1,30 @@
 """Delegate tool: bridges chat brain to execution brain.
 When the chat model needs to run commands, edit files, or do anything
 requiring system-level access, it calls this tool. The task is passed
 to the execution brain (Claude Code CLI) which has full tool access.
 """
 from __future__ import annotations
 from . import tool
@tool(
    "delegate_task",
    description=(
        "Delegate a complex task to the execution brain (Claude Code CLI). "
        "Use this when you need to: run shell commands, read/write/edit files, "
        "check system status, inspect the codebase, or perform any system-level "
        "operation. Describe the task clearly and the execution brain will carry "
        "it out using its full tool suite (Bash, Read, Edit, Write, Glob, Grep)."
    ),
    category="system",
 )
 def delegate_task(task_description: str, ctx: dict = None) -> str:
    """Delegate a task to the execution brain."""
    if not ctx or "agent" not in ctx:
        return "Error: delegate tool requires agent context."
    agent = ctx["agent"]
    return agent.execute_task(task_description)
--- a/cheddahbot/ui.py
+++ b/cheddahbot/ui.py
@ -25,12 +25,19 @@ footer { display: none !important; }
 def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
    """Build and return the Gradio app."""
-    available_models = llm.list_available_models()
+    available_models = llm.list_chat_models()
    model_choices = [(m.name, m.id) for m in available_models]
    current_model = llm.current_model
    exec_status = "available" if llm.is_execution_brain_available() else "unavailable"
    with gr.Blocks(title="CheddahBot") as app:
        gr.Markdown("# CheddahBot", elem_classes=["contain"])
        gr.Markdown(
            f"*Chat Brain:* `{current_model}` &nbsp;|&nbsp; "
            f"*Execution Brain (Claude Code CLI):* `{exec_status}`",
            elem_classes=["contain"],
        )
        with gr.Row(elem_classes=["contain"]):
            model_dropdown = gr.Dropdown(
@ -38,6 +45,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
                value=current_model,
                label="Model",
                interactive=True,
                allow_custom_value=True,
                scale=3,
            )
            refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
@ -90,7 +98,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            return f"Switched to {model_id}"
        def on_refresh_models():
-            models = llm.list_available_models()
+            models = llm.list_chat_models()
            choices = [(m.name, m.id) for m in models]
            return gr.update(choices=choices, value=llm.current_model)
@ -103,6 +111,8 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]
        def on_user_message(message, chat_history):
            chat_history = chat_history or []
            # Extract text and files from MultimodalTextbox
            if isinstance(message, dict):
                text = message.get("text", "")
@ -140,12 +150,22 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            yield chat_history, gr.update(value=None)
            # Stream assistant response
-            response_text = ""
+            try:
-            chat_history = chat_history + [{"role": "assistant", "content": ""}]
+                response_text = ""
                chat_history = chat_history + [{"role": "assistant", "content": ""}]
-            for chunk in agent.respond(text, files=processed_files):
+                for chunk in agent.respond(text, files=processed_files):
-                response_text += chunk
+                    response_text += chunk
-                chat_history[-1] = {"role": "assistant", "content": response_text}
+                    chat_history[-1] = {"role": "assistant", "content": response_text}
                    yield chat_history, gr.update(value=None)
                # If no response came through, show a fallback
                if not response_text:
                    chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"}
                    yield chat_history, gr.update(value=None)
            except Exception as e:
                log.error("Error in agent.respond: %s", e, exc_info=True)
                chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}]
                yield chat_history, gr.update(value=None)
        def on_voice_chat(audio_path):
--- a/config.yaml
+++ b/config.yaml
@ -1,6 +1,9 @@
 # CheddahBot Configuration
-# Default model to use on startup
+# Chat model (for UI conversations - needs OpenRouter key or local model)
 chat_model: "openai/gpt-4o-mini"
 # Execution model (Claude Code CLI - uses Max subscription for heartbeat/scheduler)
 default_model: "claude-sonnet-4-20250514"
 # Gradio server settings
--- a/identity/USER.md
+++ b/identity/USER.md
@ -3,6 +3,8 @@
 ## Identity
 - Name: (your name here)
 - How to address: (first name, nickname, etc.)
 - Origin: Cheddah is named after the user's Xbox Live gamertag, "CheddahYetti."
 - Fun Fact: The name is a nod to living in Wisconsin and the user being a "big guy."
 ## Context
 - Technical level: (beginner/intermediate/advanced)