Implement two-brain architecture: chat brain + execution brain

Chat brain uses OpenAI-compatible APIs (OpenRouter/Ollama/LM Studio) for all UI conversations, giving full control over system prompts so the Cheddah personality works correctly. Execution brain uses Claude Code CLI for heartbeat, scheduled tasks, and delegated system-level work. - Split llm.py: chat() routes through OpenAI-compat only, new execute() calls Claude CLI with Bash/Read/Edit/Write/Glob/Grep tools - Add chat_model config field (default: openai/gpt-4o-mini) - Add delegate_task tool bridging chat brain to execution brain - Scheduler/heartbeat now use execute_task() for real CLI power - UI dropdown shows chat-only models with custom value support - Updated model list to current OpenRouter top models (Feb 2026) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 22:59:32 -06:00 · 2026-02-13 22:59:32 -06:00 · af767f9684
parent 1b73cf2e5d
commit af767f9684
9 changed files with 211 additions and 115 deletions
--- a/cheddahbot/main.py
+++ b/cheddahbot/main.py
@ -24,13 +24,18 @@ def main():
    log.info("Initializing database...")
    db = Database(config.db_path)

-    log.info("Initializing LLM adapter (default model: %s)...", config.default_model)
+    log.info("Chat brain model: %s", config.chat_model)
+    log.info("Execution brain model: %s (Claude Code CLI)", config.default_model)
    llm = LLMAdapter(
-        default_model=config.default_model,
+        default_model=config.chat_model,
        openrouter_key=config.openrouter_api_key,
        ollama_url=config.ollama_url,
        lmstudio_url=config.lmstudio_url,
    )
+    if llm.is_execution_brain_available():
+        log.info("Execution brain: Claude Code CLI found in PATH")
+    else:
+        log.warning("Execution brain: Claude Code CLI NOT found — heartbeat/scheduler tasks will fail")

    log.info("Creating agent...")
    agent = Agent(config, db, llm)
--- a/cheddahbot/agent.py
+++ b/cheddahbot/agent.py
@ -132,3 +132,21 @@ class Agent:
        for chunk in self.respond(prompt):
            result_parts.append(chunk)
        return "".join(result_parts)
+
+    def execute_task(self, prompt: str, system_context: str = "") -> str:
+        """Execute a task using the execution brain (Claude Code CLI).
+
+        Used by heartbeat, scheduler, and the delegate tool.
+        Logs the result to daily memory if available.
+        """
+        log.info("Execution brain task: %s", prompt[:100])
+        result = self.llm.execute(prompt, system_prompt=system_context)
+
+        # Log to daily memory
+        if self._memory:
+            try:
+                self._memory.log_daily(f"[Execution] {prompt[:200]}\n→ {result[:500]}")
+            except Exception as e:
+                log.warning("Failed to log execution to memory: %s", e)
+
+        return result
--- a/cheddahbot/config.py
+++ b/cheddahbot/config.py
@ -35,6 +35,7 @@ class ShellConfig:

@dataclass
 class Config:
+    chat_model: str = "openai/gpt-4o-mini"
    default_model: str = "claude-sonnet-4-20250514"
    host: str = "0.0.0.0"
    port: int = 7860
@ -63,7 +64,7 @@ def load_config() -> Config:
    if yaml_path.exists():
        with open(yaml_path) as f:
            data = yaml.safe_load(f) or {}
-        for key in ("default_model", "host", "port", "ollama_url", "lmstudio_url"):
+        for key in ("chat_model", "default_model", "host", "port", "ollama_url", "lmstudio_url"):
            if key in data:
                setattr(cfg, key, data[key])
        if "memory" in data and isinstance(data["memory"], dict):
@ -81,6 +82,8 @@ def load_config() -> Config:

    # Env var overrides (CHEDDAH_ prefix)
    cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "")
+    if cm := os.getenv("CHEDDAH_CHAT_MODEL"):
+        cfg.chat_model = cm
    if m := os.getenv("CHEDDAH_DEFAULT_MODEL"):
        cfg.default_model = m
    if h := os.getenv("CHEDDAH_HOST"):
--- a/cheddahbot/llm.py
+++ b/cheddahbot/llm.py
@ -1,9 +1,14 @@
-"""Model-agnostic LLM adapter.
+"""Two-brain LLM adapter.

-Routing:
-  - Claude models → Claude Code SDK (subprocess, uses Max subscription)
-  - Cloud models  → OpenRouter (single API key, OpenAI-compatible)
-  - Local models  → direct HTTP (Ollama / LM Studio, OpenAI-compatible)
+Chat Brain:
+  - OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs)
+  - Full control over system prompt — Cheddah personality works here
+  - Claude models available via OpenRouter mapping
+
+Execution Brain:
+  - Claude Code CLI (subprocess)
+  - Used for heartbeat, scheduled tasks, delegated system-level work
+  - Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here
 """

 from __future__ import annotations
@ -26,31 +31,26 @@ log = logging.getLogger(__name__)
 class ModelInfo:
    id: str
    name: str
-    provider: str  # "claude" | "openrouter" | "ollama" | "lmstudio"
+    provider: str  # "openrouter" | "ollama" | "lmstudio"
    context_length: int | None = None


-# Well-known Claude models that route through the SDK
-CLAUDE_MODELS = {
-    "claude-sonnet-4-20250514",
-    "claude-opus-4-20250514",
-    "claude-haiku-4-20250514",
+# Claude model IDs → OpenRouter equivalents (for chat dropdown)
+CLAUDE_OPENROUTER_MAP = {
+    "claude-sonnet-4-20250514": "anthropic/claude-sonnet-4.5",
+    "claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
+    "claude-opus-4-20250514": "anthropic/claude-opus-4.6",
+    "claude-opus-4.6": "anthropic/claude-opus-4.6",
 }


-def _is_claude_model(model_id: str) -> bool:
-    return model_id in CLAUDE_MODELS or model_id.startswith("claude-")
-
-
-def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str:
-    if _is_claude_model(model_id):
-        return "claude"
+def _provider_for(model_id: str, openrouter_key: str) -> str:
+    """Determine which OpenAI-compatible provider to route a chat model to."""
    if model_id.startswith("local/ollama/"):
        return "ollama"
    if model_id.startswith("local/lmstudio/"):
        return "lmstudio"
-    if openrouter_key:
-        return "openrouter"
+    # Everything else goes through OpenRouter (including mapped Claude models)
    return "openrouter"


@ -70,13 +70,13 @@ class LLMAdapter:

    @property
    def provider(self) -> str:
-        return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url)
+        return _provider_for(self.current_model, self.openrouter_key)

    def switch_model(self, model_id: str):
        self.current_model = model_id
-        log.info("Switched to model: %s (provider: %s)", model_id, self.provider)
+        log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider)

-    # ── Main entry point ──
+    # ── Chat Brain (OpenAI-compatible only) ──

    def chat(
        self,
@ -84,47 +84,66 @@ class LLMAdapter:
        tools: list[dict] | None = None,
        stream: bool = True,
    ) -> Generator[dict, None, None]:
-        """Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}."""
+        """Chat brain: routes through OpenAI-compatible APIs only.
+
+        Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.
+        """
        provider = self.provider
-        if provider == "claude":
-            yield from self._chat_claude_sdk(messages, tools, stream)
-        else:
-            base_url, api_key = self._resolve_endpoint(provider)
-            model_id = self._resolve_model_id(provider)
-            yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
+        model_id = self._resolve_model_id(provider)

-    # ── Claude Code SDK (subprocess) ──
-
-    def _chat_claude_sdk(
-        self, messages: list[dict], tools: list[dict] | None, stream: bool
-    ) -> Generator[dict, None, None]:
-        # Separate system prompt from user messages
-        system_prompt = ""
-        user_prompt_parts = []
-        for m in messages:
-            role = m.get("role", "user")
-            content = m.get("content", "")
-            if isinstance(content, list):
-                content = " ".join(c.get("text", "") for c in content if c.get("type") == "text")
-            if role == "system":
-                system_prompt += content + "\n"
-            elif role == "assistant":
-                user_prompt_parts.append(f"[Assistant]\n{content}")
+        # If a Claude model ID was selected, map it to OpenRouter equivalent
+        if model_id in CLAUDE_OPENROUTER_MAP:
+            if self.openrouter_key:
+                model_id = CLAUDE_OPENROUTER_MAP[model_id]
+                provider = "openrouter"
            else:
-                user_prompt_parts.append(content)
-        user_prompt = "\n\n".join(user_prompt_parts)
+                yield {"type": "text", "content": (
+                    "To chat with Claude models, you need an OpenRouter API key "
+                    "(set OPENROUTER_API_KEY in .env). Alternatively, select a local "
+                    "model from Ollama or LM Studio."
+                )}
+                return

-        # Find claude CLI - on Windows needs .cmd extension for npm-installed binaries
-        claude_bin = shutil.which("claude")
-        if not claude_bin:
-            yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
+        # Check if provider is available
+        if provider == "openrouter" and not self.openrouter_key:
+            yield {"type": "text", "content": (
+                "No API key configured. To use cloud models:\n"
+                "1. Get an OpenRouter API key at https://openrouter.ai/keys\n"
+                "2. Set OPENROUTER_API_KEY in your .env file\n\n"
+                "Or install Ollama (free, local) and pull a model:\n"
+                "  ollama pull llama3.2"
+            )}
            return

-        cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model,
-               "--output-format", "json", "--tools", ""]
-        if system_prompt.strip():
-            cmd.extend(["--system-prompt", system_prompt.strip()])
-        log.debug("Claude SDK using: %s", claude_bin)
+        base_url, api_key = self._resolve_endpoint(provider)
+        yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
+
+    # ── Execution Brain (Claude Code CLI) ──
+
+    def execute(
+        self,
+        prompt: str,
+        system_prompt: str = "",
+        working_dir: str | None = None,
+    ) -> str:
+        """Execution brain: calls Claude Code CLI with full tool access.
+
+        Used for heartbeat checks, scheduled tasks, and delegated complex tasks.
+        Returns the full result string (non-streaming).
+        """
+        claude_bin = shutil.which("claude")
+        if not claude_bin:
+            return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"
+
+        cmd = [
+            claude_bin, "-p", prompt,
+            "--output-format", "json",
+            "--tools", "Bash,Read,Edit,Write,Glob,Grep",
+        ]
+        if system_prompt:
+            cmd.extend(["--system-prompt", system_prompt])
+
+        log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...")

        # Strip CLAUDECODE env var so the subprocess doesn't think it's nested
        env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
@ -137,31 +156,35 @@ class LLMAdapter:
                text=True,
                encoding="utf-8",
                shell=(sys.platform == "win32"),
+                cwd=working_dir,
                env=env,
            )
        except FileNotFoundError:
-            yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
-            return
+            return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"

-        stdout, stderr = proc.communicate(timeout=120)
+        try:
+            stdout, stderr = proc.communicate(timeout=300)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            return "Error: Claude Code execution timed out after 5 minutes."

        if proc.returncode != 0:
-            yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"}
-            return
+            return f"Execution error: {stderr or 'unknown error'}"

-        # --output-format json returns a single JSON object
        try:
            result = json.loads(stdout)
            text = result.get("result", "")
            if text:
-                yield {"type": "text", "content": text}
-            elif result.get("is_error"):
-                yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"}
-            return
+                return text
+            if result.get("is_error"):
+                return f"Execution error: {result.get('result', 'unknown')}"
+            return "(No output from execution brain)"
        except json.JSONDecodeError:
-            # Fallback: treat as plain text
-            if stdout.strip():
-                yield {"type": "text", "content": stdout.strip()}
+            return stdout.strip() if stdout.strip() else "(No output from execution brain)"
+
+    def is_execution_brain_available(self) -> bool:
+        """Check if the Claude Code CLI is available."""
+        return shutil.which("claude") is not None

    # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──

@ -259,25 +282,6 @@ class LLMAdapter:
            return model.removeprefix("local/lmstudio/")
        return model

-    def _messages_to_prompt(self, messages: list[dict]) -> str:
-        """Flatten messages into a single prompt string for Claude SDK -p flag."""
-        parts = []
-        for m in messages:
-            role = m.get("role", "user")
-            content = m.get("content", "")
-            if isinstance(content, list):
-                # multimodal - extract text parts
-                content = " ".join(
-                    c.get("text", "") for c in content if c.get("type") == "text"
-                )
-            if role == "system":
-                parts.append(f"[System]\n{content}")
-            elif role == "assistant":
-                parts.append(f"[Assistant]\n{content}")
-            else:
-                parts.append(content)
-        return "\n\n".join(parts)
-
    def _get_openai(self):
        if self._openai_mod is None:
            import openai
@ -314,21 +318,32 @@ class LLMAdapter:
            pass
        return models

-    def list_available_models(self) -> list[ModelInfo]:
-        """Return all available models across all providers."""
-        models = [
-            ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"),
-            ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"),
-            ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"),
-        ]
+    def list_chat_models(self) -> list[ModelInfo]:
+        """Return models available for the chat brain (no direct Claude SDK entries)."""
+        models = []
+
        if self.openrouter_key:
            models.extend([
-                ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"),
+                # Anthropic (via OpenRouter — system prompts work correctly)
+                ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"),
+                ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"),
+                # Google
+                ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"),
+                ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"),
+                ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"),
+                # OpenAI
+                ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"),
                ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
-                ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"),
-                ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"),
-                ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"),
-                ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"),
+                # DeepSeek / xAI / Others
+                ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"),
+                ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"),
+                ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"),
+                ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"),
            ])
+
        models.extend(self.discover_local_models())
        return models
+
+    def list_available_models(self) -> list[ModelInfo]:
+        """Backwards-compatible alias for list_chat_models()."""
+        return self.list_chat_models()
--- a/cheddahbot/scheduler.py
+++ b/cheddahbot/scheduler.py
@ -60,7 +60,7 @@ class Scheduler:
        for task in tasks:
            try:
                log.info("Running scheduled task: %s", task["name"])
-                result = self.agent.respond_to_prompt(task["prompt"])
+                result = self.agent.execute_task(task["prompt"])
                self.db.log_task_run(task["id"], result=result[:2000])

                # Calculate next run
@ -107,7 +107,7 @@ class Scheduler:
            f"{checklist}"
        )

-        result = self.agent.respond_to_prompt(prompt)
+        result = self.agent.execute_task(prompt, system_context=checklist)

        if HEARTBEAT_OK in result:
            log.debug("Heartbeat: all clear")
--- a/cheddahbot/tools/delegate.py
+++ b/cheddahbot/tools/delegate.py
@ -0,0 +1,30 @@
+"""Delegate tool: bridges chat brain to execution brain.
+
+When the chat model needs to run commands, edit files, or do anything
+requiring system-level access, it calls this tool. The task is passed
+to the execution brain (Claude Code CLI) which has full tool access.
+"""
+
+from __future__ import annotations
+
+from . import tool
+
+
+@tool(
+    "delegate_task",
+    description=(
+        "Delegate a complex task to the execution brain (Claude Code CLI). "
+        "Use this when you need to: run shell commands, read/write/edit files, "
+        "check system status, inspect the codebase, or perform any system-level "
+        "operation. Describe the task clearly and the execution brain will carry "
+        "it out using its full tool suite (Bash, Read, Edit, Write, Glob, Grep)."
+    ),
+    category="system",
+)
+def delegate_task(task_description: str, ctx: dict = None) -> str:
+    """Delegate a task to the execution brain."""
+    if not ctx or "agent" not in ctx:
+        return "Error: delegate tool requires agent context."
+
+    agent = ctx["agent"]
+    return agent.execute_task(task_description)
--- a/cheddahbot/ui.py
+++ b/cheddahbot/ui.py
@ -25,12 +25,19 @@ footer { display: none !important; }
 def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
    """Build and return the Gradio app."""

-    available_models = llm.list_available_models()
+    available_models = llm.list_chat_models()
    model_choices = [(m.name, m.id) for m in available_models]
    current_model = llm.current_model

+    exec_status = "available" if llm.is_execution_brain_available() else "unavailable"
+
    with gr.Blocks(title="CheddahBot") as app:
        gr.Markdown("# CheddahBot", elem_classes=["contain"])
+        gr.Markdown(
+            f"*Chat Brain:* `{current_model}` &nbsp;|&nbsp; "
+            f"*Execution Brain (Claude Code CLI):* `{exec_status}`",
+            elem_classes=["contain"],
+        )

        with gr.Row(elem_classes=["contain"]):
            model_dropdown = gr.Dropdown(
@ -38,6 +45,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
                value=current_model,
                label="Model",
                interactive=True,
+                allow_custom_value=True,
                scale=3,
            )
            refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
@ -90,7 +98,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            return f"Switched to {model_id}"

        def on_refresh_models():
-            models = llm.list_available_models()
+            models = llm.list_chat_models()
            choices = [(m.name, m.id) for m in models]
            return gr.update(choices=choices, value=llm.current_model)

@ -103,6 +111,8 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]

        def on_user_message(message, chat_history):
+            chat_history = chat_history or []
+
            # Extract text and files from MultimodalTextbox
            if isinstance(message, dict):
                text = message.get("text", "")
@ -140,12 +150,22 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
            yield chat_history, gr.update(value=None)

            # Stream assistant response
-            response_text = ""
-            chat_history = chat_history + [{"role": "assistant", "content": ""}]
+            try:
+                response_text = ""
+                chat_history = chat_history + [{"role": "assistant", "content": ""}]

-            for chunk in agent.respond(text, files=processed_files):
-                response_text += chunk
-                chat_history[-1] = {"role": "assistant", "content": response_text}
+                for chunk in agent.respond(text, files=processed_files):
+                    response_text += chunk
+                    chat_history[-1] = {"role": "assistant", "content": response_text}
+                    yield chat_history, gr.update(value=None)
+
+                # If no response came through, show a fallback
+                if not response_text:
+                    chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"}
+                    yield chat_history, gr.update(value=None)
+            except Exception as e:
+                log.error("Error in agent.respond: %s", e, exc_info=True)
+                chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}]
                yield chat_history, gr.update(value=None)

        def on_voice_chat(audio_path):
--- a/config.yaml
+++ b/config.yaml
@ -1,6 +1,9 @@
 # CheddahBot Configuration

-# Default model to use on startup
+# Chat model (for UI conversations - needs OpenRouter key or local model)
+chat_model: "openai/gpt-4o-mini"
+
+# Execution model (Claude Code CLI - uses Max subscription for heartbeat/scheduler)
 default_model: "claude-sonnet-4-20250514"

 # Gradio server settings
--- a/identity/USER.md
+++ b/identity/USER.md
@ -3,6 +3,8 @@
 ## Identity
 - Name: (your name here)
 - How to address: (first name, nickname, etc.)
+- Origin: Cheddah is named after the user's Xbox Live gamertag, "CheddahYetti."
+- Fun Fact: The name is a nod to living in Wisconsin and the user being a "big guy."

 ## Context
 - Technical level: (beginner/intermediate/advanced)