"""Two-brain LLM adapter. Chat Brain: - OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs) - Full control over system prompt — Cheddah personality works here - Claude models available via OpenRouter mapping Execution Brain: - Claude Code CLI (subprocess) - Used for heartbeat, scheduled tasks, delegated system-level work - Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here """ from __future__ import annotations import json import logging import os import shutil import subprocess import sys from dataclasses import dataclass from typing import Generator import httpx log = logging.getLogger(__name__) @dataclass class ModelInfo: id: str name: str provider: str # "openrouter" | "ollama" | "lmstudio" context_length: int | None = None # Claude model IDs → OpenRouter equivalents (for chat dropdown) CLAUDE_OPENROUTER_MAP = { "claude-sonnet-4.5": "anthropic/claude-sonnet-4.5", "claude-opus-4.6": "anthropic/claude-opus-4.6", "claude-haiku-4.5": "anthropic/claude-haiku-4.5", } def _provider_for(model_id: str, openrouter_key: str) -> str: """Determine which OpenAI-compatible provider to route a chat model to.""" if model_id.startswith("local/ollama/"): return "ollama" if model_id.startswith("local/lmstudio/"): return "lmstudio" # Everything else goes through OpenRouter (including mapped Claude models) return "openrouter" class LLMAdapter: def __init__( self, default_model: str = "claude-sonnet-4.5", openrouter_key: str = "", ollama_url: str = "http://localhost:11434", lmstudio_url: str = "http://localhost:1234", ): self.current_model = default_model self.openrouter_key = openrouter_key self.ollama_url = ollama_url.rstrip("/") self.lmstudio_url = lmstudio_url.rstrip("/") self._openai_mod = None # lazy import @property def provider(self) -> str: return _provider_for(self.current_model, self.openrouter_key) def switch_model(self, model_id: str): self.current_model = model_id log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider) # ── Chat Brain (OpenAI-compatible only) ── def chat( self, messages: list[dict], tools: list[dict] | None = None, stream: bool = True, ) -> Generator[dict, None, None]: """Chat brain: routes through OpenAI-compatible APIs only. Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}. """ provider = self.provider model_id = self._resolve_model_id(provider) # If a Claude model ID was selected, map it to OpenRouter equivalent if model_id in CLAUDE_OPENROUTER_MAP: if self.openrouter_key: model_id = CLAUDE_OPENROUTER_MAP[model_id] provider = "openrouter" else: yield {"type": "text", "content": ( "To chat with Claude models, you need an OpenRouter API key " "(set OPENROUTER_API_KEY in .env). Alternatively, select a local " "model from Ollama or LM Studio." )} return # Check if provider is available if provider == "openrouter" and not self.openrouter_key: yield {"type": "text", "content": ( "No API key configured. To use cloud models:\n" "1. Get an OpenRouter API key at https://openrouter.ai/keys\n" "2. Set OPENROUTER_API_KEY in your .env file\n\n" "Or install Ollama (free, local) and pull a model:\n" " ollama pull llama3.2" )} return base_url, api_key = self._resolve_endpoint(provider) yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id) # ── Execution Brain (Claude Code CLI) ── def execute( self, prompt: str, system_prompt: str = "", working_dir: str | None = None, tools: str = "Bash,Read,Edit,Write,Glob,Grep", model: str | None = None, ) -> str: """Execution brain: calls Claude Code CLI with full tool access. Used for heartbeat checks, scheduled tasks, and delegated complex tasks. Returns the full result string (non-streaming). Args: tools: Comma-separated Claude Code tool names (default: standard set). model: Override the CLI model (e.g. "claude-sonnet-4.5"). """ claude_bin = shutil.which("claude") if not claude_bin: return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code" # Pipe prompt through stdin to avoid Windows 8191-char command-line limit. cmd = [ claude_bin, "-p", "--output-format", "json", "--tools", tools, ] if model: cmd.extend(["--model", model]) if system_prompt: cmd.extend(["--system-prompt", system_prompt]) log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...") # Strip CLAUDECODE env var so the subprocess doesn't think it's nested env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} try: proc = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8", shell=(sys.platform == "win32"), cwd=working_dir, env=env, ) except FileNotFoundError: return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code" try: stdout, stderr = proc.communicate(input=prompt, timeout=300) except subprocess.TimeoutExpired: proc.kill() return "Error: Claude Code execution timed out after 5 minutes." if proc.returncode != 0: return f"Execution error: {stderr or 'unknown error'}" try: result = json.loads(stdout) text = result.get("result", "") if text: return text if result.get("is_error"): return f"Execution error: {result.get('result', 'unknown')}" return "(No output from execution brain)" except json.JSONDecodeError: return stdout.strip() if stdout.strip() else "(No output from execution brain)" def is_execution_brain_available(self) -> bool: """Check if the Claude Code CLI is available.""" return shutil.which("claude") is not None # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ── def _chat_openai_sdk( self, messages: list[dict], tools: list[dict] | None, stream: bool, base_url: str, api_key: str, model_id: str, ) -> Generator[dict, None, None]: openai = self._get_openai() client = openai.OpenAI(base_url=base_url, api_key=api_key) kwargs: dict = { "model": model_id, "messages": messages, "stream": stream, } if tools: kwargs["tools"] = tools try: if stream: response = client.chat.completions.create(**kwargs) tool_calls_accum: dict[int, dict] = {} for chunk in response: delta = chunk.choices[0].delta if chunk.choices else None if not delta: continue if delta.content: yield {"type": "text", "content": delta.content} if delta.tool_calls: for tc in delta.tool_calls: idx = tc.index if idx not in tool_calls_accum: tool_calls_accum[idx] = { "id": tc.id or "", "name": tc.function.name if tc.function and tc.function.name else "", "arguments": "", } if tc.function and tc.function.arguments: tool_calls_accum[idx]["arguments"] += tc.function.arguments if tc.id: tool_calls_accum[idx]["id"] = tc.id for _, tc in sorted(tool_calls_accum.items()): try: args = json.loads(tc["arguments"]) except json.JSONDecodeError: args = {} yield { "type": "tool_use", "id": tc["id"], "name": tc["name"], "input": args, } else: response = client.chat.completions.create(**kwargs) msg = response.choices[0].message if msg.content: yield {"type": "text", "content": msg.content} if msg.tool_calls: for tc in msg.tool_calls: try: args = json.loads(tc.function.arguments) except json.JSONDecodeError: args = {} yield { "type": "tool_use", "id": tc.id, "name": tc.function.name, "input": args, } except Exception as e: yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"} # ── Helpers ── def _resolve_endpoint(self, provider: str) -> tuple[str, str]: if provider == "openrouter": return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder" elif provider == "ollama": return f"{self.ollama_url}/v1", "ollama" elif provider == "lmstudio": return f"{self.lmstudio_url}/v1", "lm-studio" return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder" def _resolve_model_id(self, provider: str) -> str: model = self.current_model if provider == "ollama" and model.startswith("local/ollama/"): return model.removeprefix("local/ollama/") if provider == "lmstudio" and model.startswith("local/lmstudio/"): return model.removeprefix("local/lmstudio/") return model def _get_openai(self): if self._openai_mod is None: import openai self._openai_mod = openai return self._openai_mod # ── Model Discovery ── def discover_local_models(self) -> list[ModelInfo]: models = [] # Ollama try: r = httpx.get(f"{self.ollama_url}/api/tags", timeout=3) if r.status_code == 200: for m in r.json().get("models", []): models.append(ModelInfo( id=f"local/ollama/{m['name']}", name=f"[Ollama] {m['name']}", provider="ollama", )) except Exception: pass # LM Studio try: r = httpx.get(f"{self.lmstudio_url}/v1/models", timeout=3) if r.status_code == 200: for m in r.json().get("data", []): models.append(ModelInfo( id=f"local/lmstudio/{m['id']}", name=f"[LM Studio] {m['id']}", provider="lmstudio", )) except Exception: pass return models def list_chat_models(self) -> list[ModelInfo]: """Return models available for the chat brain (no direct Claude SDK entries).""" models = [] if self.openrouter_key: models.extend([ # Anthropic (via OpenRouter — system prompts work correctly) ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"), ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"), # Google ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"), ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"), ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"), # OpenAI ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"), ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"), # DeepSeek / xAI / Others ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"), ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"), ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"), ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"), ]) models.extend(self.discover_local_models()) return models def list_available_models(self) -> list[ModelInfo]: """Backwards-compatible alias for list_chat_models().""" return self.list_chat_models()