CheddahBot/cheddahbot/llm.py

"""Model-agnostic LLM adapter.

Routing:
  - Claude models → Claude Code SDK (subprocess, uses Max subscription)
  - Cloud models  → OpenRouter (single API key, OpenAI-compatible)
  - Local models  → direct HTTP (Ollama / LM Studio, OpenAI-compatible)
"""

from __future__ import annotations

import json
import logging
import os
import shutil
import subprocess
import sys
from dataclasses import dataclass
from typing import Generator

import httpx

log = logging.getLogger(__name__)


@dataclass
class ModelInfo:
    id: str
    name: str
    provider: str  # "claude" | "openrouter" | "ollama" | "lmstudio"
    context_length: int | None = None


# Well-known Claude models that route through the SDK
CLAUDE_MODELS = {
    "claude-sonnet-4-20250514",
    "claude-opus-4-20250514",
    "claude-haiku-4-20250514",
}


def _is_claude_model(model_id: str) -> bool:
    return model_id in CLAUDE_MODELS or model_id.startswith("claude-")


def _provider_for(model_id: str, openrouter_key: str, ollama_url: str, lmstudio_url: str) -> str:
    if _is_claude_model(model_id):
        return "claude"
    if model_id.startswith("local/ollama/"):
        return "ollama"
    if model_id.startswith("local/lmstudio/"):
        return "lmstudio"
    if openrouter_key:
        return "openrouter"
    return "openrouter"


class LLMAdapter:
    def __init__(
        self,
        default_model: str = "claude-sonnet-4-20250514",
        openrouter_key: str = "",
        ollama_url: str = "http://localhost:11434",
        lmstudio_url: str = "http://localhost:1234",
    ):
        self.current_model = default_model
        self.openrouter_key = openrouter_key
        self.ollama_url = ollama_url.rstrip("/")
        self.lmstudio_url = lmstudio_url.rstrip("/")
        self._openai_mod = None  # lazy import

    @property
    def provider(self) -> str:
        return _provider_for(self.current_model, self.openrouter_key, self.ollama_url, self.lmstudio_url)

    def switch_model(self, model_id: str):
        self.current_model = model_id
        log.info("Switched to model: %s (provider: %s)", model_id, self.provider)

    # ── Main entry point ──

    def chat(
        self,
        messages: list[dict],
        tools: list[dict] | None = None,
        stream: bool = True,
    ) -> Generator[dict, None, None]:
        """Yield chunks: {"type": "text", "content": "..."} or {"type": "tool_use", ...}."""
        provider = self.provider
        if provider == "claude":
            yield from self._chat_claude_sdk(messages, tools, stream)
        else:
            base_url, api_key = self._resolve_endpoint(provider)
            model_id = self._resolve_model_id(provider)
            yield from self._chat_openai_sdk(messages, tools, stream, base_url, api_key, model_id)

    # ── Claude Code SDK (subprocess) ──

    def _chat_claude_sdk(
        self, messages: list[dict], tools: list[dict] | None, stream: bool
    ) -> Generator[dict, None, None]:
        # Separate system prompt from user messages
        system_prompt = ""
        user_prompt_parts = []
        for m in messages:
            role = m.get("role", "user")
            content = m.get("content", "")
            if isinstance(content, list):
                content = " ".join(c.get("text", "") for c in content if c.get("type") == "text")
            if role == "system":
                system_prompt += content + "\n"
            elif role == "assistant":
                user_prompt_parts.append(f"[Assistant]\n{content}")
            else:
                user_prompt_parts.append(content)
        user_prompt = "\n\n".join(user_prompt_parts)

        # Find claude CLI - on Windows needs .cmd extension for npm-installed binaries
        claude_bin = shutil.which("claude")
        if not claude_bin:
            yield {"type": "text", "content": "Error: `claude` CLI not found in PATH. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
            return

        cmd = [claude_bin, "-p", user_prompt, "--model", self.current_model,
               "--output-format", "json", "--tools", ""]
        if system_prompt.strip():
            cmd.extend(["--system-prompt", system_prompt.strip()])
        log.debug("Claude SDK using: %s", claude_bin)

        # Strip CLAUDECODE env var so the subprocess doesn't think it's nested
        env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}

        try:
            proc = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding="utf-8",
                shell=(sys.platform == "win32"),
                env=env,
            )
        except FileNotFoundError:
            yield {"type": "text", "content": "Error: `claude` CLI not found. Install Claude Code: npm install -g @anthropic-ai/claude-code"}
            return

        stdout, stderr = proc.communicate(timeout=120)

        if proc.returncode != 0:
            yield {"type": "text", "content": f"Claude SDK error: {stderr or 'unknown error'}"}
            return

        # --output-format json returns a single JSON object
        try:
            result = json.loads(stdout)
            text = result.get("result", "")
            if text:
                yield {"type": "text", "content": text}
            elif result.get("is_error"):
                yield {"type": "text", "content": f"Claude error: {result.get('result', 'unknown')}"}
            return
        except json.JSONDecodeError:
            # Fallback: treat as plain text
            if stdout.strip():
                yield {"type": "text", "content": stdout.strip()}

    # ── OpenAI-compatible SDK (OpenRouter / Ollama / LM Studio) ──

    def _chat_openai_sdk(
        self,
        messages: list[dict],
        tools: list[dict] | None,
        stream: bool,
        base_url: str,
        api_key: str,
        model_id: str,
    ) -> Generator[dict, None, None]:
        openai = self._get_openai()
        client = openai.OpenAI(base_url=base_url, api_key=api_key)

        kwargs: dict = {
            "model": model_id,
            "messages": messages,
            "stream": stream,
        }
        if tools:
            kwargs["tools"] = tools

        try:
            if stream:
                response = client.chat.completions.create(**kwargs)
                tool_calls_accum: dict[int, dict] = {}
                for chunk in response:
                    delta = chunk.choices[0].delta if chunk.choices else None
                    if not delta:
                        continue
                    if delta.content:
                        yield {"type": "text", "content": delta.content}
                    if delta.tool_calls:
                        for tc in delta.tool_calls:
                            idx = tc.index
                            if idx not in tool_calls_accum:
                                tool_calls_accum[idx] = {
                                    "id": tc.id or "",
                                    "name": tc.function.name if tc.function and tc.function.name else "",
                                    "arguments": "",
                                }
                            if tc.function and tc.function.arguments:
                                tool_calls_accum[idx]["arguments"] += tc.function.arguments
                            if tc.id:
                                tool_calls_accum[idx]["id"] = tc.id

                for _, tc in sorted(tool_calls_accum.items()):
                    try:
                        args = json.loads(tc["arguments"])
                    except json.JSONDecodeError:
                        args = {}
                    yield {
                        "type": "tool_use",
                        "id": tc["id"],
                        "name": tc["name"],
                        "input": args,
                    }
            else:
                response = client.chat.completions.create(**kwargs)
                msg = response.choices[0].message
                if msg.content:
                    yield {"type": "text", "content": msg.content}
                if msg.tool_calls:
                    for tc in msg.tool_calls:
                        try:
                            args = json.loads(tc.function.arguments)
                        except json.JSONDecodeError:
                            args = {}
                        yield {
                            "type": "tool_use",
                            "id": tc.id,
                            "name": tc.function.name,
                            "input": args,
                        }
        except Exception as e:
            yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"}

    # ── Helpers ──

    def _resolve_endpoint(self, provider: str) -> tuple[str, str]:
        if provider == "openrouter":
            return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder"
        elif provider == "ollama":
            return f"{self.ollama_url}/v1", "ollama"
        elif provider == "lmstudio":
            return f"{self.lmstudio_url}/v1", "lm-studio"
        return "https://openrouter.ai/api/v1", self.openrouter_key or "sk-placeholder"

    def _resolve_model_id(self, provider: str) -> str:
        model = self.current_model
        if provider == "ollama" and model.startswith("local/ollama/"):
            return model.removeprefix("local/ollama/")
        if provider == "lmstudio" and model.startswith("local/lmstudio/"):
            return model.removeprefix("local/lmstudio/")
        return model

    def _messages_to_prompt(self, messages: list[dict]) -> str:
        """Flatten messages into a single prompt string for Claude SDK -p flag."""
        parts = []
        for m in messages:
            role = m.get("role", "user")
            content = m.get("content", "")
            if isinstance(content, list):
                # multimodal - extract text parts
                content = " ".join(
                    c.get("text", "") for c in content if c.get("type") == "text"
                )
            if role == "system":
                parts.append(f"[System]\n{content}")
            elif role == "assistant":
                parts.append(f"[Assistant]\n{content}")
            else:
                parts.append(content)
        return "\n\n".join(parts)

    def _get_openai(self):
        if self._openai_mod is None:
            import openai
            self._openai_mod = openai
        return self._openai_mod

    # ── Model Discovery ──

    def discover_local_models(self) -> list[ModelInfo]:
        models = []
        # Ollama
        try:
            r = httpx.get(f"{self.ollama_url}/api/tags", timeout=3)
            if r.status_code == 200:
                for m in r.json().get("models", []):
                    models.append(ModelInfo(
                        id=f"local/ollama/{m['name']}",
                        name=f"[Ollama] {m['name']}",
                        provider="ollama",
                    ))
        except Exception:
            pass
        # LM Studio
        try:
            r = httpx.get(f"{self.lmstudio_url}/v1/models", timeout=3)
            if r.status_code == 200:
                for m in r.json().get("data", []):
                    models.append(ModelInfo(
                        id=f"local/lmstudio/{m['id']}",
                        name=f"[LM Studio] {m['id']}",
                        provider="lmstudio",
                    ))
        except Exception:
            pass
        return models

    def list_available_models(self) -> list[ModelInfo]:
        """Return all available models across all providers."""
        models = [
            ModelInfo("claude-sonnet-4-20250514", "Claude Sonnet 4", "claude"),
            ModelInfo("claude-opus-4-20250514", "Claude Opus 4", "claude"),
            ModelInfo("claude-haiku-4-20250514", "Claude Haiku 4", "claude"),
        ]
        if self.openrouter_key:
            models.extend([
                ModelInfo("openai/gpt-4o", "GPT-4o", "openrouter"),
                ModelInfo("openai/gpt-4o-mini", "GPT-4o Mini", "openrouter"),
                ModelInfo("google/gemini-2.0-flash-001", "Gemini 2.0 Flash", "openrouter"),
                ModelInfo("google/gemini-2.5-pro-preview", "Gemini 2.5 Pro", "openrouter"),
                ModelInfo("mistralai/mistral-large", "Mistral Large", "openrouter"),
                ModelInfo("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B", "openrouter"),
            ])
        models.extend(self.discover_local_models())
        return models