From d9e0020b673134f74425b07a066c9a7b0196539d Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Thu, 19 Feb 2026 21:34:02 -0600 Subject: [PATCH] Fix UI errors, message formatting, scheduler auto_execute, and LLM retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Core fixes: - Rewrite router.py format_messages_for_llm() to properly handle tool call/result message pairs in OpenAI format instead of faking them as user messages — root cause of most LLM API errors - Fix scheduler ignoring auto_execute:false flag, which caused all Link Building tasks to be incorrectly executed and moved to internal review - Add safety check so Skipped/Error tool results don't get marked as completed in ClickUp Additional improvements: - Add LLM retry logic (2 retries on transient 5xx/timeout/rate-limit) - Replace raw LLM tracebacks with friendly error messages - Fix ghost assistant bubble in UI by deferring append to first chunk - Auto-title conversations from first user message - Consistent tool_call_id generation (resolve once, reuse everywhere) - Reduce pipeline status polling from 3s to 10s - Update CLAUDE.md: remove stale watchdog/autostart docs Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 2 +- cheddahbot/agent.py | 32 +++++++-- cheddahbot/db.py | 6 ++ cheddahbot/llm.py | 144 ++++++++++++++++++++++++++-------------- cheddahbot/router.py | 99 ++++++++++++++++++++++++--- cheddahbot/scheduler.py | 31 +++++++++ cheddahbot/ui.py | 43 ++++++++---- 7 files changed, 282 insertions(+), 75 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 4cc0797..6b05849 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -202,4 +202,4 @@ Fixtures in `conftest.py`: `tmp_db` (fresh SQLite), `sample_clickup_task_data` ( - Don't add to `requirements.txt` — use `uv add` (pyproject.toml) - Don't call tools directly from UI code — go through `NotificationBus` for scheduler events - Don't store ClickUp state outside of `kv_store` — it's the single source of truth -- **Don't try to restart CheddahBot by killing and relaunching** — there is an autostart/watchdog that respawns the process automatically after a kill. If you need to restart, just kill the old PID and the autostart will handle it. Do NOT launch a second instance or you'll get port conflicts. +- **Restarting CheddahBot** — there is no watchdog/autostart. Kill the old process, then relaunch with `uv run python -m cheddahbot` (or `bash start.sh`). Do NOT launch a second instance without killing the first or you'll get port conflicts. diff --git a/cheddahbot/agent.py b/cheddahbot/agent.py index 421853a..4bbd86f 100644 --- a/cheddahbot/agent.py +++ b/cheddahbot/agent.py @@ -223,10 +223,16 @@ class Agent: # Execute tools if self._tools: + # Resolve IDs once so assistant message and tool responses match + resolved_ids = [ + tc.get("id") or f"call_{tc['name']}_{i}" + for i, tc in enumerate(unique_tool_calls) + ] + # Build OpenAI-format assistant message with tool_calls openai_tool_calls = [ { - "id": tc.get("id", f"call_{tc['name']}_{i}"), + "id": resolved_ids[i], "type": "function", "function": { "name": tc["name"], @@ -243,19 +249,19 @@ class Agent: } ) - for tc in unique_tool_calls: - yield f"\n\n**Using tool: {tc['name']}**\n" + for i, tc in enumerate(unique_tool_calls): + yield f"\n\n*Calling {tc['name']}...*\n" try: result = self._tools.execute(tc["name"], tc.get("input", {})) except Exception as e: result = f"Tool error: {e}" - yield f"```\n{result[:2000]}\n```\n\n" + log.info("Tool %s result: %s", tc["name"], result[:500]) self.db.add_message(conv_id, "tool", result, tool_result=tc["name"]) messages.append( { "role": "tool", - "tool_call_id": tc.get("id", f"call_{tc['name']}"), + "tool_call_id": resolved_ids[i], "content": result, } ) @@ -271,12 +277,28 @@ class Agent: else: yield "\n(Reached maximum tool iterations)" + # Auto-title the conversation from the first user message + self._maybe_set_title(conv_id, user_input) + # Check if memory flush is needed if self._memory: msg_count = self.db.count_messages(conv_id) if msg_count > self.config.memory.flush_threshold: self._memory.auto_flush(conv_id) + def _maybe_set_title(self, conv_id: str, user_input: str): + """Set conversation title from first user message if still untitled.""" + try: + if self.db.count_messages(conv_id) > 3: + return + title = user_input.split("\n", 1)[0].strip() + if len(title) > 50: + title = title[:47] + "..." + if title: + self.db.update_conversation_title(conv_id, title) + except Exception as e: + log.warning("Failed to set conversation title: %s", e) + def respond_to_prompt(self, prompt: str) -> str: """Non-streaming response for scheduled tasks / internal use.""" result_parts = [] diff --git a/cheddahbot/db.py b/cheddahbot/db.py index 141e7fd..b5af9aa 100644 --- a/cheddahbot/db.py +++ b/cheddahbot/db.py @@ -109,6 +109,12 @@ class Database: ).fetchall() return [dict(r) for r in rows] + def update_conversation_title(self, conv_id: str, title: str): + self._conn.execute( + "UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id) + ) + self._conn.commit() + # -- Messages -- def add_message( diff --git a/cheddahbot/llm.py b/cheddahbot/llm.py index e36089d..1c1c59c 100644 --- a/cheddahbot/llm.py +++ b/cheddahbot/llm.py @@ -19,6 +19,7 @@ import os import shutil import subprocess import sys +import time from collections.abc import Generator from dataclasses import dataclass @@ -234,62 +235,77 @@ class LLMAdapter: if tools: kwargs["tools"] = tools - try: - if stream: - response = client.chat.completions.create(**kwargs) - tool_calls_accum: dict[int, dict] = {} - for chunk in response: - delta = chunk.choices[0].delta if chunk.choices else None - if not delta: - continue - if delta.content: - yield {"type": "text", "content": delta.content} - if delta.tool_calls: - for tc in delta.tool_calls: - idx = tc.index - if idx not in tool_calls_accum: - tool_calls_accum[idx] = { - "id": tc.id or "", - "name": tc.function.name - if tc.function and tc.function.name - else "", - "arguments": "", - } - if tc.function and tc.function.arguments: - tool_calls_accum[idx]["arguments"] += tc.function.arguments - if tc.id: - tool_calls_accum[idx]["id"] = tc.id + max_retries = 2 + has_yielded = False - for _, tc in sorted(tool_calls_accum.items()): - try: - args = json.loads(tc["arguments"]) - except json.JSONDecodeError: - args = {} - yield { - "type": "tool_use", - "id": tc["id"], - "name": tc["name"], - "input": args, - } - else: - response = client.chat.completions.create(**kwargs) - msg = response.choices[0].message - if msg.content: - yield {"type": "text", "content": msg.content} - if msg.tool_calls: - for tc in msg.tool_calls: + for attempt in range(max_retries + 1): + try: + if stream: + response = client.chat.completions.create(**kwargs) + tool_calls_accum: dict[int, dict] = {} + for chunk in response: + delta = chunk.choices[0].delta if chunk.choices else None + if not delta: + continue + if delta.content: + has_yielded = True + yield {"type": "text", "content": delta.content} + if delta.tool_calls: + for tc in delta.tool_calls: + idx = tc.index + if idx not in tool_calls_accum: + tool_calls_accum[idx] = { + "id": tc.id or "", + "name": tc.function.name + if tc.function and tc.function.name + else "", + "arguments": "", + } + if tc.function and tc.function.arguments: + tool_calls_accum[idx]["arguments"] += tc.function.arguments + if tc.id: + tool_calls_accum[idx]["id"] = tc.id + + for _, tc in sorted(tool_calls_accum.items()): try: - args = json.loads(tc.function.arguments) + args = json.loads(tc["arguments"]) except json.JSONDecodeError: args = {} yield { "type": "tool_use", - "id": tc.id, - "name": tc.function.name, + "id": tc["id"], + "name": tc["name"], "input": args, } - except Exception as e: - yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"} + else: + response = client.chat.completions.create(**kwargs) + msg = response.choices[0].message + if msg.content: + has_yielded = True + yield {"type": "text", "content": msg.content} + if msg.tool_calls: + for tc in msg.tool_calls: + try: + args = json.loads(tc.function.arguments) + except json.JSONDecodeError: + args = {} + yield { + "type": "tool_use", + "id": tc.id, + "name": tc.function.name, + "input": args, + } + # Success — break out of retry loop + return + + except Exception as e: + if not has_yielded and attempt < max_retries and _is_retryable_error(e): + wait = 2 ** attempt + log.warning("Retryable LLM error (attempt %d/%d), retrying in %ds: %s", + attempt + 1, max_retries + 1, wait, e) + time.sleep(wait) + continue + yield {"type": "text", "content": _friendly_error(e, self.provider)} # ── Helpers ── @@ -386,3 +402,35 @@ class LLMAdapter: def list_available_models(self) -> list[ModelInfo]: """Backwards-compatible alias for list_chat_models().""" return self.list_chat_models() + + +def _is_retryable_error(e: Exception) -> bool: + """Return True for transient errors worth retrying (5xx, timeout, rate limit).""" + name = type(e).__name__ + # openai library exceptions + if name in ("APITimeoutError", "InternalServerError", "RateLimitError", "APIConnectionError"): + return True + # Status-code based (works with openai.APIStatusError subclasses) + status = getattr(e, "status_code", None) + if status and status >= 500: + return True + return status == 429 + + +def _friendly_error(e: Exception, provider: str) -> str: + """Map common LLM exceptions to plain-English messages.""" + name = type(e).__name__ + if name == "AuthenticationError" or "401" in str(e): + return f"Authentication failed for {provider}. Please check your API key." + if name == "RateLimitError" or "429" in str(e): + return f"Rate limited by {provider}. Please wait a moment and try again." + if name in ("APITimeoutError", "APIConnectionError") or "timeout" in str(e).lower(): + return ( + f"Could not reach {provider} — the service may be down " + "or your connection is interrupted." + ) + if name == "InternalServerError" or (getattr(e, "status_code", None) or 0) >= 500: + return f"{provider} returned a server error. Please try again shortly." + # Generic fallback — still friendlier than a raw traceback + log.error("LLM error (%s): %s", provider, e, exc_info=True) + return f"Something went wrong talking to {provider}. Check the logs for details." diff --git a/cheddahbot/router.py b/cheddahbot/router.py index df239db..43d8d9e 100644 --- a/cheddahbot/router.py +++ b/cheddahbot/router.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path @@ -63,20 +64,100 @@ def format_messages_for_llm( history: list[dict], max_messages: int = 50, ) -> list[dict]: - """Format conversation history into LLM message format.""" + """Format conversation history into LLM message format. + + Handles three message types from the DB: + - user: passed through as role=user + - assistant: reconstructed with tool_calls in OpenAI format when present; + skipped if empty content AND no tool_calls + - tool: kept as role=tool with a tool_call_id linking back to the + assistant message that requested it + """ messages = [{"role": "system", "content": system_prompt}] - # Take the most recent messages up to the limit recent = history[-max_messages:] if len(history) > max_messages else history for msg in recent: role = msg.get("role", "user") content = msg.get("content", "") - if role in ("user", "assistant", "system"): - messages.append({"role": role, "content": content}) - elif role == "tool": - # Tool results go as a user message with context - tool_name = msg.get("tool_result", "unknown") - messages.append({"role": "user", "content": f'[Tool "{tool_name}" result]\n{content}'}) + tool_calls = msg.get("tool_calls") # list or None - return messages + if role == "user": + messages.append({"role": "user", "content": content}) + + elif role == "assistant": + # Skip completely empty assistant messages (no text, no tool_calls) + if not content and not tool_calls: + continue + + entry: dict = {"role": "assistant", "content": content or None} + + if tool_calls: + openai_tcs = [] + for i, tc in enumerate(tool_calls): + tc_id = tc.get("id") or f"call_{tc.get('name', 'unknown')}_{i}" + openai_tcs.append( + { + "id": tc_id, + "type": "function", + "function": { + "name": tc.get("name", "unknown"), + "arguments": json.dumps(tc.get("input", {})), + }, + } + ) + entry["tool_calls"] = openai_tcs + + messages.append(entry) + + elif role == "tool": + tool_name = msg.get("tool_result", "unknown") + tc_id = _find_tool_call_id(messages, tool_name) + messages.append( + { + "role": "tool", + "tool_call_id": tc_id, + "content": content, + } + ) + + return _merge_consecutive(messages) + + +def _find_tool_call_id(messages: list[dict], tool_name: str) -> str: + """Walk backwards through messages to find the tool_call_id for a tool result.""" + for msg in reversed(messages): + if msg.get("role") != "assistant" or "tool_calls" not in msg: + continue + for tc in msg["tool_calls"]: + fn = tc.get("function", {}) + if fn.get("name") == tool_name: + return tc["id"] + # Fallback: generate a deterministic ID so the API doesn't reject it + return f"call_{tool_name}_0" + + +def _merge_consecutive(messages: list[dict]) -> list[dict]: + """Merge back-to-back messages with the same role to avoid API rejection. + + Only merges user or assistant messages (not system or tool). + """ + if not messages: + return messages + + merged: list[dict] = [messages[0]] + for msg in messages[1:]: + prev = merged[-1] + if ( + msg["role"] == prev["role"] + and msg["role"] in ("user", "assistant") + and "tool_calls" not in prev + and "tool_calls" not in msg + ): + # Merge text content + prev_text = prev.get("content") or "" + new_text = msg.get("content") or "" + prev["content"] = f"{prev_text}\n\n{new_text}".strip() + else: + merged.append(msg) + return merged diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py index 12f1405..556475d 100644 --- a/cheddahbot/scheduler.py +++ b/cheddahbot/scheduler.py @@ -288,6 +288,16 @@ class Scheduler: if task.task_type not in skill_map: continue + # Respect auto_execute flag — skip tasks that require manual trigger + mapping = skill_map[task.task_type] + if not mapping.get("auto_execute", False): + log.debug( + "Skipping task '%s' (type=%s): auto_execute is false", + task.name, + task.task_type, + ) + continue + # Client-side verify: due_date must exist and be within window if not task.due_date: continue @@ -350,6 +360,27 @@ class Scheduler: f"Task description: {state.get('custom_fields', {})}" ) + # Check if the tool skipped or reported an error without doing work + if result.startswith("Skipped:") or result.startswith("Error:"): + state["state"] = "failed" + state["error"] = result[:500] + state["completed_at"] = datetime.now(UTC).isoformat() + self.db.kv_set(kv_key, json.dumps(state)) + + client.add_comment( + task_id, + f"⚠️ CheddahBot could not execute this task.\n\n{result[:2000]}", + ) + # Move back to "to do" so it can be retried + client.update_task_status(task_id, "to do") + + self._notify( + f"ClickUp task skipped: **{task.name}**\n" + f"Reason: {result[:200]}" + ) + log.info("ClickUp task skipped: %s — %s", task.name, result[:200]) + return + # Check if the tool already handled ClickUp sync internally tool_handled_sync = "## ClickUp Sync" in result diff --git a/cheddahbot/ui.py b/cheddahbot/ui.py index 69c8482..955fb30 100644 --- a/cheddahbot/ui.py +++ b/cheddahbot/ui.py @@ -360,26 +360,45 @@ def create_ui( agent = _get_agent(agent_name) try: response_text = "" - chat_history = [*chat_history, {"role": "assistant", "content": ""}] + assistant_added = False for chunk in agent.respond(text, files=processed_files): response_text += chunk - chat_history[-1] = {"role": "assistant", "content": response_text} + if not assistant_added: + chat_history = [ + *chat_history, + {"role": "assistant", "content": response_text}, + ] + assistant_added = True + else: + chat_history[-1] = {"role": "assistant", "content": response_text} yield chat_history, gr.update(value=None), gr.update() # If no response came through, show a fallback if not response_text: - chat_history[-1] = { - "role": "assistant", - "content": "(No response received from model)", - } + chat_history = [ + *chat_history, + { + "role": "assistant", + "content": "(No response received from model)", + }, + ] yield chat_history, gr.update(value=None), gr.update() except Exception as e: log.error("Error in agent.respond: %s", e, exc_info=True) - chat_history = [ - *chat_history, - {"role": "assistant", "content": f"Error: {e}"}, - ] + error_msg = f"Sorry, something went wrong: {e}" + # If last message is an empty assistant bubble, replace it + if ( + chat_history + and chat_history[-1].get("role") == "assistant" + and not chat_history[-1].get("content") + ): + chat_history[-1] = {"role": "assistant", "content": error_msg} + else: + chat_history = [ + *chat_history, + {"role": "assistant", "content": error_msg}, + ] yield chat_history, gr.update(value=None), gr.update() # Refresh conversation list after message (title/updated_at may have changed) @@ -466,8 +485,8 @@ def create_ui( outputs=[active_agent_name, agent_selector, chatbot, conv_list_state, browser_state], ) - # Pipeline status polling timer (every 3 seconds) - status_timer = gr.Timer(3) + # Pipeline status polling timer (every 10 seconds) + status_timer = gr.Timer(10) status_timer.tick(poll_pipeline_status, [active_agent_name], [pipeline_status]) # Notification polling timer (every 10 seconds)