CheddahBot/cheddahbot/llm.py

359 lines
13 KiB
Python

"""Two-brain LLM adapter.
Chat Brain:
- OpenRouter / Ollama / LM Studio (OpenAI-compatible APIs)
- Full control over system prompt — Cheddah personality works here
- Claude models available via OpenRouter mapping
Execution Brain:
- Claude Code CLI (subprocess)
- Used for heartbeat, scheduled tasks, delegated system-level work
- Claude's built-in tools (Bash, Read, Edit, etc.) are a feature here
"""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
import sys
from dataclasses import dataclass
from typing import Generator
import httpx
log = logging.getLogger(__name__)
@dataclass
class ModelInfo:
id: str
name: str
provider: str # "openrouter" | "ollama" | "lmstudio"
context_length: int | None = None
# Claude model IDs → OpenRouter equivalents (for chat dropdown)
CLAUDE_OPENROUTER_MAP = {
"claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
"claude-opus-4.6": "anthropic/claude-opus-4.6",
"claude-haiku-4.5": "anthropic/claude-haiku-4.5",
}
def _provider_for(model_id: str, openrouter_key: str) -> str:
"""Determine which OpenAI-compatible provider to route a chat model to."""
if model_id.startswith("local/ollama/"):
return "ollama"
if model_id.startswith("local/lmstudio/"):
return "lmstudio"
# Everything else goes through OpenRouter (including mapped Claude models)
return "openrouter"
class LLMAdapter:
def __init__(
self,
default_model: str = "claude-sonnet-4.5",
openrouter_key: str = "",
ollama_url: str = "http://localhost:11434",
lmstudio_url: str = "http://localhost:1234",
):
self.current_model = default_model
self.openrouter_key = openrouter_key
self.ollama_url = ollama_url.rstrip("/")
self.lmstudio_url = lmstudio_url.rstrip("/")
self._openai_mod = None # lazy import
@property
def provider(self) -> str:
return _provider_for(self.current_model, self.openrouter_key)
def switch_model(self, model_id: str):
self.current_model = model_id
log.info("Switched chat model to: %s (provider: %s)", model_id, self.provider)
# ── Chat Brain (OpenAI-compatible only) ──
def chat(
self,
messages: list[dict],
tools: list[dict] | None = None,
stream: bool = True,
) -> Generator[dict, None, None]:
"""Chat brain: routes through OpenAI-compatible APIs only.
Yields chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}.
"""
provider = self.provider
model_id = self._resolve_model_id(provider)
# If a Claude model ID was selected, map it to OpenRouter equivalent
if model_id in CLAUDE_OPENROUTER_MAP:
if self.openrouter_key:
model_id = CLAUDE_OPENROUTER_MAP[model_id]
provider = "openrouter"
else:
yield {"type": "text", "content": (
"To chat with Claude models, you need an OpenRouter API key "
"(set OPENROUTER_API_KEY in .env). Alternatively, select a local "
"model from Ollama or LM Studio."
)}
return
# Check if provider is available
if provider == "openrouter" and not self.openrouter_key:
yield {"type": "text", "content": (
"No API key configured. To use cloud models:\n"
"1. Get an OpenRouter API key at https://openrouter.ai/keys\n"
"2. Set OPENROUTER_API_KEY in your .env file\n\n"
"Or install Ollama (free, local) and pull a model:\n"
" ollama pull llama3.2"
)}
return
base_url, api_key = self._resolve_endpoint(provider)
yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)
# ── Execution Brain (Claude Code CLI) ──
def execute(
self,
prompt: str,
system_prompt: str = "",
working_dir: str | None = None,
tools: str = "Bash,Read,Edit,Write,Glob,Grep",
model: str | None = None,
) -> str:
"""Execution brain: calls Claude Code CLI with full tool access.
Used for heartbeat checks, scheduled tasks, and delegated complex tasks.
Returns the full result string (non-streaming).
Args:
tools: Comma-separated Claude Code tool names (default: standard set).
model: Override the CLI model (e.g. "claude-sonnet-4.5").
"""
claude_bin = shutil.which("claude")
if not claude_bin:
return "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"
# Pipe prompt through stdin to avoid Windows 8191-char command-line limit.
cmd = [
claude_bin, "-p",
"--output-format", "json",
"--tools", tools,
]
if model:
cmd.extend(["--model", model])
if system_prompt:
cmd.extend(["--system-prompt", system_prompt])
log.debug("Execution brain cmd: %s", " ".join(cmd[:6]) + "...")
# Strip CLAUDECODE env var so the subprocess doesn't think it's nested
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
try:
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding="utf-8",
shell=(sys.platform == "win32"),
cwd=working_dir,
env=env,
)
except FileNotFoundError:
return "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"
try:
stdout, stderr = proc.communicate(input=prompt, timeout=300)
except subprocess.TimeoutExpired:
proc.kill()
return "Error: Claude Code execution timed out after 5 minutes."
if proc.returncode != 0:
return f"Execution error: {stderr or 'unknown error'}"
try:
result = json.loads(stdout)
text = result.get("result", "")
if text:
return text
if result.get("is_error"):
return f"Execution error: {result.get('result', 'unknown')}"
return "(No output from execution brain)"
except json.JSONDecodeError:
return stdout.strip() if stdout.strip() else "(No output from execution brain)"
def is_execution_brain_available(self) -> bool:
"""Check if the Claude Code CLI is available."""
return shutil.which("claude") is not None
# ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──
def _chat_openai_sdk(
self,
messages: list[dict],
tools: list[dict] | None,
stream: bool,
base_url: str,
api_key: str,
model_id: str,
) -> Generator[dict, None, None]:
openai = self._get_openai()
client = openai.OpenAI(base_url=base_url, api_key=api_key)
kwargs: dict = {
"model": model_id,
"messages": messages,
"stream": stream,
}
if tools:
kwargs["tools"] = tools
try:
if stream:
response = client.chat.completions.create(**kwargs)
tool_calls_accum: dict[int, dict] = {}
for chunk in response:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
if delta.content:
yield {"type": "text", "content": delta.content}
if delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index
if idx not in tool_calls_accum:
tool_calls_accum[idx] = {
"id": tc.id or "",
"name": tc.function.name if tc.function and tc.function.name else "",
"arguments": "",
}
if tc.function and tc.function.arguments:
tool_calls_accum[idx]["arguments"] += tc.function.arguments
if tc.id:
tool_calls_accum[idx]["id"] = tc.id
for _, tc in sorted(tool_calls_accum.items()):
try:
args = json.loads(tc["arguments"])
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc["id"],
"name": tc["name"],
"input": args,
}
else:
response = client.chat.completions.create(**kwargs)
msg = response.choices[0].message
if msg.content:
yield {"type": "text", "content": msg.content}
if msg.tool_calls:
for tc in msg.tool_calls:
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc.id,
"name": tc.function.name,
"input": args,
}
except Exception as e:
yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"}
# ── Helpers ──
def _resolve_endpoint(self, provider: str) -> tuple[str, str]:
if provider == "openrouter":
return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder"
elif provider == "ollama":
return f"{self.ollama_url}/v1", "ollama"
elif provider == "lmstudio":
return f"{self.lmstudio_url}/v1", "lm-studio"
return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder"
def _resolve_model_id(self, provider: str) -> str:
model = self.current_model
if provider == "ollama" and model.startswith("local/ollama/"):
return model.removeprefix("local/ollama/")
if provider == "lmstudio" and model.startswith("local/lmstudio/"):
return model.removeprefix("local/lmstudio/")
return model
def _get_openai(self):
if self._openai_mod is None:
import openai
self._openai_mod = openai
return self._openai_mod
# ── Model Discovery ──
def discover_local_models(self) -> list[ModelInfo]:
models = []
# Ollama
try:
r = httpx.get(f"{self.ollama_url}/api/tags", timeout=3)
if r.status_code == 200:
for m in r.json().get("models", []):
models.append(ModelInfo(
id=f"local/ollama/{m['name']}",
name=f"[Ollama] {m['name']}",
provider="ollama",
))
except Exception:
pass
# LM Studio
try:
r = httpx.get(f"{self.lmstudio_url}/v1/models", timeout=3)
if r.status_code == 200:
for m in r.json().get("data", []):
models.append(ModelInfo(
id=f"local/lmstudio/{m['id']}",
name=f"[LM Studio] {m['id']}",
provider="lmstudio",
))
except Exception:
pass
return models
def list_chat_models(self) -> list[ModelInfo]:
"""Return models available for the chat brain (no direct Claude SDK entries)."""
models = []
if self.openrouter_key:
models.extend([
# Anthropic (via OpenRouter — system prompts work correctly)
ModelInfo("anthropic/claude-sonnet-4.5", "Claude Sonnet 4.5", "openrouter"),
ModelInfo("anthropic/claude-opus-4.6", "Claude Opus 4.6", "openrouter"),
# Google
ModelInfo("google/gemini-3-flash-preview", "Gemini 3 Flash Preview", "openrouter"),
ModelInfo("google/gemini-2.5-flash", "Gemini 2.5 Flash", "openrouter"),
ModelInfo("google/gemini-2.5-flash-lite", "Gemini 2.5 Flash Lite", "openrouter"),
# OpenAI
ModelInfo("openai/gpt-5-nano", "GPT-5 Nano", "openrouter"),
ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
# DeepSeek / xAI / Others
ModelInfo("deepseek/deepseek-v3.2", "DeepSeek V3.2", "openrouter"),
ModelInfo("x-ai/grok-4.1-fast", "Grok 4.1 Fast", "openrouter"),
ModelInfo("moonshotai/kimi-k2.5", "Kimi K2.5", "openrouter"),
ModelInfo("minimax/minimax-m2.5", "MiniMax M2.5", "openrouter"),
])
models.extend(self.discover_local_models())
return models
def list_available_models(self) -> list[ModelInfo]:
"""Backwards-compatible alias for list_chat_models()."""
return self.list_chat_models()