Add API cost tracking and switch planner to Grok 4.1 Fast

Track per-call token usage and estimated costs across all OpenRouter models. Switch planner agent from Claude Sonnet 4.6 ($3/$15 per M) to Grok 4.1 Fast ($0.20/$0.50 per M) for ~25x cost reduction. Add budget alerts, a dashboard card, and a check_api_usage tool for visibility into spending. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 18:00:11 -06:00 · 2026-02-23 18:00:11 -06:00 · 0b3ab904de
parent ab2c313baa
commit 0b3ab904de
8 changed files with 264 additions and 1 deletions
--- a/cheddahbot/agent.py
+++ b/cheddahbot/agent.py
@ -186,6 +186,18 @@ class Agent:
                    yield chunk["content"]
                elif chunk["type"] == "tool_use":
                    tool_calls.append(chunk)
+                elif chunk["type"] == "usage":
+                    if self.db:
+                        self.db.log_api_usage(
+                            model=chunk["model"],
+                            provider="openrouter",
+                            prompt_tokens=chunk["prompt_tokens"],
+                            completion_tokens=chunk["completion_tokens"],
+                            total_tokens=chunk["total_tokens"],
+                            estimated_cost=chunk["estimated_cost"],
+                            conv_id=conv_id,
+                            agent_name=self.agent_config.name if self.agent_config else "default",
+                        )

            # If no tool calls, we're done
            if not tool_calls:
--- a/cheddahbot/config.py
+++ b/cheddahbot/config.py
@ -76,6 +76,12 @@ class LinkBuildingConfig:
    default_branded_plus_ratio: float = 0.7


+@dataclass
+class ApiBudgetConfig:
+    monthly_limit: float = 20.00  # USD - alert when exceeded
+    alert_threshold: float = 0.8  # alert at 80% of limit
+
+
@dataclass
 class AgentConfig:
    """Per-agent configuration for multi-agent support."""
@ -105,6 +111,7 @@ class Config:
    press_advantage: PressAdvantageConfig = field(default_factory=PressAdvantageConfig)
    email: EmailConfig = field(default_factory=EmailConfig)
    link_building: LinkBuildingConfig = field(default_factory=LinkBuildingConfig)
+    api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig)
    agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()])

    # Derived paths
@ -156,6 +163,10 @@ def load_config() -> Config:
            for k, v in data["link_building"].items():
                if hasattr(cfg.link_building, k):
                    setattr(cfg.link_building, k, v)
+        if "api_budget" in data and isinstance(data["api_budget"], dict):
+            for k, v in data["api_budget"].items():
+                if hasattr(cfg.api_budget, k):
+                    setattr(cfg.api_budget, k, v)

        # Multi-agent configs
        if "agents" in data and isinstance(data["agents"], list):
--- a/cheddahbot/db.py
+++ b/cheddahbot/db.py
@ -72,6 +72,18 @@ class Database:
                category    TEXT NOT NULL DEFAULT 'clickup',
                created_at  TEXT NOT NULL
            );
+            CREATE TABLE IF NOT EXISTS api_usage (
+                id                INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp         TEXT NOT NULL,
+                model             TEXT NOT NULL,
+                provider          TEXT NOT NULL,
+                prompt_tokens     INTEGER NOT NULL DEFAULT 0,
+                completion_tokens INTEGER NOT NULL DEFAULT 0,
+                total_tokens      INTEGER NOT NULL DEFAULT 0,
+                estimated_cost    REAL NOT NULL DEFAULT 0.0,
+                conv_id           TEXT,
+                agent_name        TEXT
+            );
        """)
        # Migration: add agent_name column to conversations (idempotent)
        with contextlib.suppress(sqlite3.OperationalError):
@ -275,6 +287,84 @@ class Database:
        ).fetchall()
        return [dict(r) for r in rows]

+    # -- API Usage --
+
+    def log_api_usage(
+        self,
+        model: str,
+        provider: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+        estimated_cost: float,
+        conv_id: str | None = None,
+        agent_name: str | None = None,
+    ):
+        now = _now()
+        self._conn.execute(
+            """INSERT INTO api_usage
+               (timestamp, model, provider, prompt_tokens, completion_tokens,
+                total_tokens, estimated_cost, conv_id, agent_name)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+            (now, model, provider, prompt_tokens, completion_tokens,
+             total_tokens, estimated_cost, conv_id, agent_name),
+        )
+        self._conn.commit()
+
+    def get_api_usage_summary(self, days: int = 30) -> dict:
+        """Return total tokens, total cost, and per-model breakdown for the period."""
+        cutoff = datetime.now(UTC).isoformat()[:10]  # today
+        # Compute cutoff date
+        from datetime import timedelta
+        cutoff_dt = datetime.now(UTC) - timedelta(days=days)
+        cutoff = cutoff_dt.isoformat()
+
+        row = self._conn.execute(
+            "SELECT COALESCE(SUM(prompt_tokens), 0) as prompt_tokens,"
+            " COALESCE(SUM(completion_tokens), 0) as completion_tokens,"
+            " COALESCE(SUM(total_tokens), 0) as total_tokens,"
+            " COALESCE(SUM(estimated_cost), 0.0) as total_cost"
+            " FROM api_usage WHERE timestamp >= ?",
+            (cutoff,),
+        ).fetchone()
+
+        model_rows = self._conn.execute(
+            "SELECT model,"
+            " COALESCE(SUM(prompt_tokens), 0) as prompt_tokens,"
+            " COALESCE(SUM(completion_tokens), 0) as completion_tokens,"
+            " COALESCE(SUM(total_tokens), 0) as total_tokens,"
+            " COALESCE(SUM(estimated_cost), 0.0) as total_cost,"
+            " COUNT(*) as call_count"
+            " FROM api_usage WHERE timestamp >= ?"
+            " GROUP BY model ORDER BY total_cost DESC",
+            (cutoff,),
+        ).fetchall()
+
+        return {
+            "prompt_tokens": row["prompt_tokens"],
+            "completion_tokens": row["completion_tokens"],
+            "total_tokens": row["total_tokens"],
+            "total_cost": row["total_cost"],
+            "by_model": [dict(r) for r in model_rows],
+        }
+
+    def get_api_usage_daily(self, days: int = 7) -> list[dict]:
+        """Return daily totals for trending."""
+        from datetime import timedelta
+        cutoff_dt = datetime.now(UTC) - timedelta(days=days)
+        cutoff = cutoff_dt.isoformat()
+
+        rows = self._conn.execute(
+            "SELECT DATE(timestamp) as day,"
+            " COALESCE(SUM(total_tokens), 0) as total_tokens,"
+            " COALESCE(SUM(estimated_cost), 0.0) as total_cost,"
+            " COUNT(*) as call_count"
+            " FROM api_usage WHERE timestamp >= ?"
+            " GROUP BY DATE(timestamp) ORDER BY day ASC",
+            (cutoff,),
+        ).fetchall()
+        return [dict(r) for r in rows]
+

 def _now() -> str:
    return datetime.now(UTC).isoformat()
--- a/cheddahbot/llm.py
+++ b/cheddahbot/llm.py
@ -27,6 +27,29 @@ import httpx

 log = logging.getLogger(__name__)

+# Pricing per million tokens: (input_cost, output_cost) in USD
+MODEL_PRICING: dict[str, tuple[float, float]] = {
+    "anthropic/claude-sonnet": (3.00, 15.00),
+    "anthropic/claude-opus": (5.00, 25.00),
+    "anthropic/claude-haiku": (0.80, 4.00),
+    "x-ai/grok-4.1-fast": (0.20, 0.50),
+    "google/gemini-3-flash": (0.50, 3.00),
+    "google/gemini-2.5-flash": (0.15, 0.60),
+    "openai/gpt-4o-mini": (0.15, 0.60),
+    "openai/gpt-5-nano": (0.10, 0.40),
+    "deepseek/deepseek-v3": (0.24, 0.38),
+    "minimax/minimax-m2.5": (0.30, 1.20),
+    "moonshotai/kimi-k2.5": (0.45, 2.20),
+}
+
+
+def _estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
+    """Estimate cost in USD using prefix matching against MODEL_PRICING."""
+    for prefix, (input_rate, output_rate) in MODEL_PRICING.items():
+        if model.startswith(prefix):
+            return (prompt_tokens * input_rate + completion_tokens * output_rate) / 1_000_000
+    return 0.0
+

@dataclass
 class ModelInfo:
@ -232,6 +255,8 @@ class LLMAdapter:
            "messages": messages,
            "stream": stream,
        }
+        if stream:
+            kwargs["stream_options"] = {"include_usage": True}
        if tools:
            kwargs["tools"] = tools

@ -243,7 +268,11 @@ class LLMAdapter:
                if stream:
                    response = client.chat.completions.create(**kwargs)
                    tool_calls_accum: dict[int, dict] = {}
+                    stream_usage = None
                    for chunk in response:
+                        # Capture usage from the final stream chunk
+                        if hasattr(chunk, "usage") and chunk.usage:
+                            stream_usage = chunk.usage
                        delta = chunk.choices[0].delta if chunk.choices else None
                        if not delta:
                            continue
@ -277,6 +306,19 @@ class LLMAdapter:
                            "name": tc["name"],
                            "input": args,
                        }
+
+                    # Yield usage chunk if available
+                    if stream_usage:
+                        pt = getattr(stream_usage, "prompt_tokens", 0) or 0
+                        ct = getattr(stream_usage, "completion_tokens", 0) or 0
+                        yield {
+                            "type": "usage",
+                            "model": model_id,
+                            "prompt_tokens": pt,
+                            "completion_tokens": ct,
+                            "total_tokens": pt + ct,
+                            "estimated_cost": _estimate_cost(model_id, pt, ct),
+                        }
                else:
                    response = client.chat.completions.create(**kwargs)
                    msg = response.choices[0].message
@ -295,6 +337,19 @@ class LLMAdapter:
                                "name": tc.function.name,
                                "input": args,
                            }
+
+                    # Yield usage chunk for non-streaming
+                    if hasattr(response, "usage") and response.usage:
+                        pt = response.usage.prompt_tokens or 0
+                        ct = response.usage.completion_tokens or 0
+                        yield {
+                            "type": "usage",
+                            "model": model_id,
+                            "prompt_tokens": pt,
+                            "completion_tokens": ct,
+                            "total_tokens": pt + ct,
+                            "estimated_cost": _estimate_cost(model_id, pt, ct),
+                        }
                # Success — break out of retry loop
                return

--- a/cheddahbot/tools/report_issue.py
+++ b/cheddahbot/tools/report_issue.py
@ -62,3 +62,54 @@ def report_issue(

    log.info("Logged improvement request: %s", title)
    return f"Logged improvement request: **{title}**. Bryan will see it on the next heartbeat."
+
+
+@tool(
+    "check_api_usage",
+    "Check API token usage and estimated costs for the last N days",
+    category="system",
+)
+def check_api_usage(days: int = 30, ctx: dict | None = None) -> str:
+    """Return a formatted report of API usage and costs."""
+    db = ctx.get("db") if ctx else None
+    if not db:
+        return "Error: database not available."
+
+    summary = db.get_api_usage_summary(days)
+    daily = db.get_api_usage_daily(min(days, 7))
+
+    total_tokens = summary["total_tokens"]
+    total_cost = summary["total_cost"]
+
+    lines = [f"## API Usage Report ({days}-day window)\n"]
+    lines.append(f"**Total tokens:** {total_tokens:,}")
+    lines.append(f"**Estimated cost:** ${total_cost:.4f}")
+
+    # Budget info
+    config = ctx.get("config") if ctx else None
+    if config and hasattr(config, "api_budget"):
+        limit = config.api_budget.monthly_limit
+        pct = (total_cost / limit * 100) if limit > 0 else 0
+        lines.append(f"**Budget:** ${total_cost:.2f} / ${limit:.2f} ({pct:.1f}%)")
+        if pct >= config.api_budget.alert_threshold * 100:
+            lines.append(f"\n**WARNING:** Spending is at {pct:.1f}% of monthly budget!")
+
+    # Per-model breakdown
+    if summary["by_model"]:
+        lines.append("\n### By Model")
+        for m in summary["by_model"]:
+            lines.append(
+                f"- **{m['model']}**: {m['total_tokens']:,} tokens, "
+                f"${m['total_cost']:.4f}, {m['call_count']} calls"
+            )
+
+    # Daily trend
+    if daily:
+        lines.append("\n### Daily Trend (last 7 days)")
+        for d in daily:
+            lines.append(
+                f"- {d['day']}: {d['total_tokens']:,} tokens, "
+                f"${d['total_cost']:.4f}, {d['call_count']} calls"
+            )
+
+    return "\n".join(lines)
--- a/cheddahbot/ui.py
+++ b/cheddahbot/ui.py
@ -214,6 +214,12 @@ def create_ui(
            elem_classes=["contain"],
        )

+        # -- API Usage card --
+        api_usage_display = gr.Markdown(
+            value="*API Usage (30d):* loading...",
+            elem_classes=["contain"],
+        )
+
        # -- Notification banner --
        notification_display = gr.Markdown(
            value="",
@ -505,6 +511,34 @@ def create_ui(
                value="*Recent System events* | System Loop: waiting for first run..."
            )

+        def poll_api_usage():
+            """Poll API usage stats for the dashboard card."""
+            try:
+                db = registry.default.db if registry.default else None
+                if not db:
+                    return gr.update()
+                summary = db.get_api_usage_summary(30)
+                total_tokens = summary["total_tokens"]
+                total_cost = summary["total_cost"]
+                # Format tokens as human-readable
+                if total_tokens >= 1_000_000:
+                    tok_str = f"{total_tokens / 1_000_000:.1f}M"
+                elif total_tokens >= 1_000:
+                    tok_str = f"{total_tokens / 1_000:.1f}K"
+                else:
+                    tok_str = str(total_tokens)
+                budget_str = ""
+                if hasattr(config, "api_budget"):
+                    limit = config.api_budget.monthly_limit
+                    budget_str = f" | Budget: ${total_cost:.2f} / ${limit:.2f}"
+                label = (
+                    f"*API Usage (30d):* {tok_str} tokens"
+                    f" | ${total_cost:.2f} est.{budget_str}"
+                )
+                return gr.update(value=label)
+            except Exception:
+                return gr.update()
+
        def on_force_pulse():
            if not scheduler:
                return gr.update(
@ -563,4 +597,8 @@ def create_ui(
            loop_timer = gr.Timer(30)
            loop_timer.tick(poll_loop_status, None, [loop_status])

+        # API usage polling timer (every 60 seconds)
+        api_timer = gr.Timer(60)
+        api_timer.tick(poll_api_usage, None, [api_usage_display])
+
    return app
--- a/config.yaml
+++ b/config.yaml
@ -115,6 +115,11 @@ agents:

  - name: planner
    display_name: Planner
-    model: "anthropic/claude-sonnet-4.6"
+    model: "x-ai/grok-4.1-fast"
    tools: [delegate_task, remember, search_memory, report_issue, web_search]
    memory_scope: ""
+
+# API budget alerts
+api_budget:
+  monthly_limit: 20.00      # USD - alert when exceeded
+  alert_threshold: 0.8      # alert at 80% of limit
--- a/identity/HEARTBEAT.md
+++ b/identity/HEARTBEAT.md
@ -6,3 +6,4 @@ Things to proactively check on each heartbeat cycle:
 - Review memory for any pending reminders that are due
 - Check disk space (warn if < 10% free)
 - Check memory/improvement_requests.md for pending items and notify Bryan with a summary
+- Check API usage costs (check_api_usage tool) and alert Bryan if monthly spend exceeds budget threshold