From af767f9684f0a9e7ee183c8f216e2cba5fc08bf4 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Fri, 13 Feb 2026 22:59:32 -0600 Subject: [PATCH] Implement two-brain architecture: chat brain + execution brain Chat brain uses OpenAI-compatible APIs (OpenRouter/Ollama/LM Studio) for all UI conversations, giving full control over system prompts so the Cheddah personality works correctly. Execution brain uses Claude Code CLI for heartbeat, scheduled tasks, and delegated system-level work. - Split llm.py: chat() routes through OpenAI-compat only, new execute() calls Claude CLI with Bash/Read/Edit/Write/Glob/Grep tools - Add chat_model config field (default: openai/gpt-4o-mini) - Add delegate_task tool bridging chat brain to execution brain - Scheduler/heartbeat now use execute_task() for real CLI power - UI dropdown shows chat-only models with custom value support - Updated model list to current OpenRouter top models (Feb 2026) Co-Authored-By: Claude Opus 4.6 --- cheddahbot/__main__.py | 9 +- cheddahbot/agent.py | 18 +++ cheddahbot/config.py | 5 +- cheddahbot/llm.py | 219 +++++++++++++++++++---------------- cheddahbot/scheduler.py | 4 +- cheddahbot/tools/delegate.py | 30 +++++ cheddahbot/ui.py | 34 ++++-- config.yaml | 5 +- identity/USER.md | 2 + 9 files changed, 211 insertions(+), 115 deletions(-) create mode 100644 cheddahbot/tools/delegate.py diff --git a/cheddahbot/__main__.py b/cheddahbot/__main__.py index 8cd4178..3e92962 100644 --- a/cheddahbot/__main__.py +++ b/cheddahbot/__main__.py @@ -24,13 +24,18 @@ def main(): log.info("Initializing database...") db = Database(config.db_path) - log.info("Initializing LLM adapter (default model: %s)...", config.default_model) + log.info("Chat brain model: %s", config.chat_model) + log.info("Execution brain model: %s (Claude Code CLI)", config.default_model) llm = LLMAdapter( - default_model=config.default_model, + default_model=config.chat_model, openrouter_key=config.openrouter_api_key, ollama_url=config.ollama_url, lmstudio_url=config.lmstudio_url, ) + if llm.is_execution_brain_available(): + log.info("Execution brain: Claude Code CLI found in PATH") + else: + log.warning("Execution brain: Claude Code CLI NOT found — heartbeat/scheduler tasks will fail") log.info("Creating agent...") agent = Agent(config, db, llm) diff --git a/cheddahbot/agent.py b/cheddahbot/agent.py index beec3c6..39fee24 100644 --- a/cheddahbot/agent.py +++ b/cheddahbot/agent.py @@ -132,3 +132,21 @@ class Agent: for chunk in self.respond(prompt): result_parts.append(chunk) return "".join(result_parts) + + def execute_task(self, prompt: str, system_context: str = "") -> str: + """Execute a task using the execution brain (Claude Code CLI). + + Used by heartbeat, scheduler, and the delegate tool. + Logs the result to daily memory if available. + """ + log.info("Execution brain task: %s", prompt[:100]) + result = self.llm.execute(prompt, system_prompt=system_context) + + # Log to daily memory + if self._memory: + try: + self._memory.log_daily(f"[Execution] {prompt[:200]}\n→ {result[:500]}") + except Exception as e: + log.warning("Failed to log execution to memory: %s", e) + + return result diff --git a/cheddahbot/config.py b/cheddahbot/config.py index 6644574..5387d81 100644 --- a/cheddahbot/config.py +++ b/cheddahbot/config.py @@ -35,6 +35,7 @@ class ShellConfig: @dataclass class Config: + chat_model: str = "openai/gpt-4o-mini" default_model: str = "claude-sonnet-4-20250514" host: str = "0.0.0.0" port: int = 7860 @@ -63,7 +64,7 @@ def load_config() -> Config: if yaml_path.exists(): with open(yaml_path) as f: data = yaml.safe_load(f) or {} - for key in ("default_model", "host", "port", "ollama_url", "lmstudio_url"): + for key in ("chat_model", "default_model", "host", "port", "ollama_url", "lmstudio_url"): if key in data: setattr(cfg, key, data[key]) if "memory" in data and isinstance(data["memory"], dict): @@ -81,6 +82,8 @@ def load_config() -> Config: # Env var overrides (CHEDDAH_ prefix) cfg.openrouter_api_key = os.getenv("OPENROUTER_API_KEY", "") + if cm := os.getenv("CHEDDAH_CHAT_MODEL"): + cfg.chat_model = cm if m := os.getenv("CHEDDAH_DEFAULT_MODEL"): cfg.default_model = m if h := os.getenv("CHEDDAH_HOST"): diff --git a/cheddahbot/llm.py b/cheddahbot/llm.py index 1485064..39daefc 100644 --- a/cheddahbot/llm.py +++ b/cheddahbot/llm.py @@ -1,9 +1,14 @@ -"""Model-agnostic LLM adapter. +"""Two-brain LLM adapter. -Routing: - - Claude models → Claude Code SDK (subprocess, uses Max subscription) - - Cloud models → OpenRouter (single API key, OpenAI-compatible) - - Local models → direct HTTP (Ollama / LM Studio, OpenAI-compatible) +Chat Brain: + - OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs) + - Full control over system prompt — Cheddah personality works here + - Claude models available via OpenRouter mapping + +Execution Brain: + - Claude Code CLI (subprocess) + - Used for heartbeat, scheduled tasks, delegated system-level work + - Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here """ from __future__ import annotations @@ -26,31 +31,26 @@ log = logging.getLogger(__name__) class ModelInfo: id: str name: str - provider: str # "claude" | "openrouter" | "ollama" | "lmstudio" + provider: str # "openrouter" | "ollama" | "lmstudio" context_length: int | None = None -# Well-known Claude models that route through the SDK -CLAUDE_MODELS = { - "claude-sonnet-4-20250514", - "claude-opus-4-20250514", - "claude-haiku-4-20250514", +# Claude model IDs → OpenRouter equivalents (for chat dropdown) +CLAUDE_OPENROUTER_MAP = { + "claude-sonnet-4-20250514": "anthropic/claude-sonnet-4.5", + "claude-sonnet-4.5": "anthropic/claude-sonnet-4.5", + "claude-opus-4-20250514": "anthropic/claude-opus-4.6", + "claude-opus-4.6": "anthropic/claude-opus-4.6", } -def _is_claude_model(model_id: str) -> bool: - return model_id in CLAUDE_MODELS or model_id.startswith("claude-") - - -def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str: - if _is_claude_model(model_id): - return "claude" +def _provider_for(model_id: str, openrouter_key: str) -> str: + """Determine which OpenAI-compatible provider to route a chat model to.""" if model_id.startswith("local/ollama/"): return "ollama" if model_id.startswith("local/lmstudio/"): return "lmstudio" - if openrouter_key: - return "openrouter" + # Everything else goes through OpenRouter (including mapped Claude models) return "openrouter" @@ -70,13 +70,13 @@ class LLMAdapter: @property def provider(self) -> str: - return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url) + return _provider_for(self.current_model, self.openrouter_key) def switch_model(self, model_id: str): self.current_model = model_id - log.info("Switched to model: %s (provider: %s)", model_id, self.provider) + log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider) - # ── Main entry point ── + # ── Chat Brain (OpenAI-compatible only) ── def chat( self, @@ -84,47 +84,66 @@ class LLMAdapter: tools: list[dict] | None = None, stream: bool = True, ) -> Generator[dict, None, None]: - """Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.""" + """Chat brain: routes through OpenAI-compatible APIs only. + + Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}. + """ provider = self.provider - if provider == "claude": - yield from self._chat_claude_sdk(messages, tools, stream) - else: - base_url, api_key = self._resolve_endpoint(provider) - model_id = self._resolve_model_id(provider) - yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id) + model_id = self._resolve_model_id(provider) - # ── Claude Code SDK (subprocess) ── - - def _chat_claude_sdk( - self, messages: list[dict], tools: list[dict] | None, stream: bool - ) -> Generator[dict, None, None]: - # Separate system prompt from user messages - system_prompt = "" - user_prompt_parts = [] - for m in messages: - role = m.get("role", "user") - content = m.get("content", "") - if isinstance(content, list): - content = " ".join(c.get("text", "") for c in content if c.get("type") == "text") - if role == "system": - system_prompt += content + "\n" - elif role == "assistant": - user_prompt_parts.append(f"[Assistant]\n{content}") + # If a Claude model ID was selected, map it to OpenRouter equivalent + if model_id in CLAUDE_OPENROUTER_MAP: + if self.openrouter_key: + model_id = CLAUDE_OPENROUTER_MAP[model_id] + provider = "openrouter" else: - user_prompt_parts.append(content) - user_prompt = "\n\n".join(user_prompt_parts) + yield {"type": "text", "content": ( + "To chat with Claude models, you need an OpenRouter API key " + "(set OPENROUTER_API_KEY in .env). Alternatively, select a local " + "model from Ollama or LM Studio." + )} + return - # Find claude CLI - on Windows needs .cmd extension for npm-installed binaries - claude_bin = shutil.which("claude") - if not claude_bin: - yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"} + # Check if provider is available + if provider == "openrouter" and not self.openrouter_key: + yield {"type": "text", "content": ( + "No API key configured. To use cloud models:\n" + "1. Get an OpenRouter API key at https://openrouter.ai/keys\n" + "2. Set OPENROUTER_API_KEY in your .env file\n\n" + "Or install Ollama (free, local) and pull a model:\n" + " ollama pull llama3.2" + )} return - cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model, - "--output-format", "json", "--tools", ""] - if system_prompt.strip(): - cmd.extend(["--system-prompt", system_prompt.strip()]) - log.debug("Claude SDK using: %s", claude_bin) + base_url, api_key = self._resolve_endpoint(provider) + yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id) + + # ── Execution Brain (Claude Code CLI) ── + + def execute( + self, + prompt: str, + system_prompt: str = "", + working_dir: str | None = None, + ) -> str: + """Execution brain: calls Claude Code CLI with full tool access. + + Used for heartbeat checks, scheduled tasks, and delegated complex tasks. + Returns the full result string (non-streaming). + """ + claude_bin = shutil.which("claude") + if not claude_bin: + return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code" + + cmd = [ + claude_bin, "-p", prompt, + "--output-format", "json", + "--tools", "Bash,Read,Edit,Write,Glob,Grep", + ] + if system_prompt: + cmd.extend(["--system-prompt", system_prompt]) + + log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...") # Strip CLAUDECODE env var so the subprocess doesn't think it's nested env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} @@ -137,31 +156,35 @@ class LLMAdapter: text=True, encoding="utf-8", shell=(sys.platform == "win32"), + cwd=working_dir, env=env, ) except FileNotFoundError: - yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"} - return + return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code" - stdout, stderr = proc.communicate(timeout=120) + try: + stdout, stderr = proc.communicate(timeout=300) + except subprocess.TimeoutExpired: + proc.kill() + return "Error: Claude Code execution timed out after 5 minutes." if proc.returncode != 0: - yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"} - return + return f"Execution error: {stderr or 'unknown error'}" - # --output-format json returns a single JSON object try: result = json.loads(stdout) text = result.get("result", "") if text: - yield {"type": "text", "content": text} - elif result.get("is_error"): - yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"} - return + return text + if result.get("is_error"): + return f"Execution error: {result.get('result', 'unknown')}" + return "(No output from execution brain)" except json.JSONDecodeError: - # Fallback: treat as plain text - if stdout.strip(): - yield {"type": "text", "content": stdout.strip()} + return stdout.strip() if stdout.strip() else "(No output from execution brain)" + + def is_execution_brain_available(self) -> bool: + """Check if the Claude Code CLI is available.""" + return shutil.which("claude") is not None # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ── @@ -259,25 +282,6 @@ class LLMAdapter: return model.removeprefix("local/lmstudio/") return model - def _messages_to_prompt(self, messages: list[dict]) -> str: - """Flatten messages into a single prompt string for Claude SDK -p flag.""" - parts = [] - for m in messages: - role = m.get("role", "user") - content = m.get("content", "") - if isinstance(content, list): - # multimodal - extract text parts - content = " ".join( - c.get("text", "") for c in content if c.get("type") == "text" - ) - if role == "system": - parts.append(f"[System]\n{content}") - elif role == "assistant": - parts.append(f"[Assistant]\n{content}") - else: - parts.append(content) - return "\n\n".join(parts) - def _get_openai(self): if self._openai_mod is None: import openai @@ -314,21 +318,32 @@ class LLMAdapter: pass return models - def list_available_models(self) -> list[ModelInfo]: - """Return all available models across all providers.""" - models = [ - ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"), - ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"), - ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"), - ] + def list_chat_models(self) -> list[ModelInfo]: + """Return models available for the chat brain (no direct Claude SDK entries).""" + models = [] + if self.openrouter_key: models.extend([ - ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"), + # Anthropic (via OpenRouter — system prompts work correctly) + ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"), + ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"), + # Google + ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"), + ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"), + ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"), + # OpenAI + ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"), ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"), - ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"), - ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"), - ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"), - ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"), + # DeepSeek / xAI / Others + ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"), + ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"), + ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"), + ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"), ]) + models.extend(self.discover_local_models()) return models + + def list_available_models(self) -> list[ModelInfo]: + """Backwards-compatible alias for list_chat_models().""" + return self.list_chat_models() diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py index 4546f90..fdd9fd7 100644 --- a/cheddahbot/scheduler.py +++ b/cheddahbot/scheduler.py @@ -60,7 +60,7 @@ class Scheduler: for task in tasks: try: log.info("Running scheduled task: %s", task["name"]) - result = self.agent.respond_to_prompt(task["prompt"]) + result = self.agent.execute_task(task["prompt"]) self.db.log_task_run(task["id"], result=result[:2000]) # Calculate next run @@ -107,7 +107,7 @@ class Scheduler: f"{checklist}" ) - result = self.agent.respond_to_prompt(prompt) + result = self.agent.execute_task(prompt, system_context=checklist) if HEARTBEAT_OK in result: log.debug("Heartbeat: all clear") diff --git a/cheddahbot/tools/delegate.py b/cheddahbot/tools/delegate.py new file mode 100644 index 0000000..0fb3788 --- /dev/null +++ b/cheddahbot/tools/delegate.py @@ -0,0 +1,30 @@ +"""Delegate tool: bridges chat brain to execution brain. + +When the chat model needs to run commands, edit files, or do anything +requiring system-level access, it calls this tool. The task is passed +to the execution brain (Claude Code CLI) which has full tool access. +""" + +from __future__ import annotations + +from . import tool + + +@tool( + "delegate_task", + description=( + "Delegate a complex task to the execution brain (Claude Code CLI). " + "Use this when you need to: run shell commands, read/write/edit files, " + "check system status, inspect the codebase, or perform any system-level " + "operation. Describe the task clearly and the execution brain will carry " + "it out using its full tool suite (Bash, Read, Edit, Write, Glob, Grep)." + ), + category="system", +) +def delegate_task(task_description: str, ctx: dict = None) -> str: + """Delegate a task to the execution brain.""" + if not ctx or "agent" not in ctx: + return "Error: delegate tool requires agent context." + + agent = ctx["agent"] + return agent.execute_task(task_description) diff --git a/cheddahbot/ui.py b/cheddahbot/ui.py index 96965dd..06c50b0 100644 --- a/cheddahbot/ui.py +++ b/cheddahbot/ui.py @@ -25,12 +25,19 @@ footer { display: none !important; } def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: """Build and return the Gradio app.""" - available_models = llm.list_available_models() + available_models = llm.list_chat_models() model_choices = [(m.name, m.id) for m in available_models] current_model = llm.current_model + exec_status = "available" if llm.is_execution_brain_available() else "unavailable" + with gr.Blocks(title="CheddahBot") as app: gr.Markdown("# CheddahBot", elem_classes=["contain"]) + gr.Markdown( + f"*Chat Brain:* `{current_model}`  |  " + f"*Execution Brain (Claude Code CLI):* `{exec_status}`", + elem_classes=["contain"], + ) with gr.Row(elem_classes=["contain"]): model_dropdown = gr.Dropdown( @@ -38,6 +45,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: value=current_model, label="Model", interactive=True, + allow_custom_value=True, scale=3, ) refresh_btn = gr.Button("Refresh", scale=0, min_width=70) @@ -90,7 +98,7 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: return f"Switched to {model_id}" def on_refresh_models(): - models = llm.list_available_models() + models = llm.list_chat_models() choices = [(m.name, m.id) for m in models] return gr.update(choices=choices, value=llm.current_model) @@ -103,6 +111,8 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs] def on_user_message(message, chat_history): + chat_history = chat_history or [] + # Extract text and files from MultimodalTextbox if isinstance(message, dict): text = message.get("text", "") @@ -140,12 +150,22 @@ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: yield chat_history, gr.update(value=None) # Stream assistant response - response_text = "" - chat_history = chat_history + [{"role": "assistant", "content": ""}] + try: + response_text = "" + chat_history = chat_history + [{"role": "assistant", "content": ""}] - for chunk in agent.respond(text, files=processed_files): - response_text += chunk - chat_history[-1] = {"role": "assistant", "content": response_text} + for chunk in agent.respond(text, files=processed_files): + response_text += chunk + chat_history[-1] = {"role": "assistant", "content": response_text} + yield chat_history, gr.update(value=None) + + # If no response came through, show a fallback + if not response_text: + chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"} + yield chat_history, gr.update(value=None) + except Exception as e: + log.error("Error in agent.respond: %s", e, exc_info=True) + chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}] yield chat_history, gr.update(value=None) def on_voice_chat(audio_path): diff --git a/config.yaml b/config.yaml index caac083..7c31c78 100644 --- a/config.yaml +++ b/config.yaml @@ -1,6 +1,9 @@ # CheddahBot Configuration -# Default model to use on startup +# Chat model (for UI conversations - needs OpenRouter key or local model) +chat_model: "openai/gpt-4o-mini" + +# Execution model (Claude Code CLI - uses Max subscription for heartbeat/scheduler) default_model: "claude-sonnet-4-20250514" # Gradio server settings diff --git a/identity/USER.md b/identity/USER.md index 37594f8..54ea41a 100644 --- a/identity/USER.md +++ b/identity/USER.md @@ -3,6 +3,8 @@ ## Identity - Name: (your name here) - How to address: (first name, nickname, etc.) +- Origin: Cheddah is named after the user's Xbox Live gamertag, "CheddahYetti." +- Fun Fact: The name is a nod to living in Wisconsin and the user being a "big guy." ## Context - Technical level: (beginner/intermediate/advanced)