Fix UI errors, message formatting, scheduler auto_execute, and LLM retry

Core fixes:
- Rewrite router.py format_messages_for_llm() to properly handle tool
  call/result message pairs in OpenAI format instead of faking them as
  user messages — root cause of most LLM API errors
- Fix scheduler ignoring auto_execute:false flag, which caused all Link
  Building tasks to be incorrectly executed and moved to internal review
- Add safety check so Skipped/Error tool results don't get marked as
  completed in ClickUp

Additional improvements:
- Add LLM retry logic (2 retries on transient 5xx/timeout/rate-limit)
- Replace raw LLM tracebacks with friendly error messages
- Fix ghost assistant bubble in UI by deferring append to first chunk
- Auto-title conversations from first user message
- Consistent tool_call_id generation (resolve once, reuse everywhere)
- Reduce pipeline status polling from 3s to 10s
- Update CLAUDE.md: remove stale watchdog/autostart docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
cora-start
PeninsulaInd 2026-02-19 21:34:02 -06:00
parent 916bec8c0e
commit d9e0020b67
7 changed files with 282 additions and 75 deletions

View File

@ -202,4 +202,4 @@ Fixtures in `conftest.py`: `tmp_db` (fresh SQLite), `sample_clickup_task_data` (
- Don't add to `requirements.txt` — use `uv add` (pyproject.toml) - Don't add to `requirements.txt` — use `uv add` (pyproject.toml)
- Don't call tools directly from UI code — go through `NotificationBus` for scheduler events - Don't call tools directly from UI code — go through `NotificationBus` for scheduler events
- Don't store ClickUp state outside of `kv_store` — it's the single source of truth - Don't store ClickUp state outside of `kv_store` — it's the single source of truth
- **Don't try to restart CheddahBot by killing and relaunching** — there is an autostart/watchdog that respawns the process automatically after a kill. If you need to restart, just kill the old PID and the autostart will handle it. Do NOT launch a second instance or you'll get port conflicts. - **Restarting CheddahBot** — there is no watchdog/autostart. Kill the old process, then relaunch with `uv run python -m cheddahbot` (or `bash start.sh`). Do NOT launch a second instance without killing the first or you'll get port conflicts.

View File

@ -223,10 +223,16 @@ class Agent:
# Execute tools # Execute tools
if self._tools: if self._tools:
# Resolve IDs once so assistant message and tool responses match
resolved_ids = [
tc.get("id") or f"call_{tc['name']}_{i}"
for i, tc in enumerate(unique_tool_calls)
]
# Build OpenAI-format assistant message with tool_calls # Build OpenAI-format assistant message with tool_calls
openai_tool_calls = [ openai_tool_calls = [
{ {
"id": tc.get("id", f"call_{tc['name']}_{i}"), "id": resolved_ids[i],
"type": "function", "type": "function",
"function": { "function": {
"name": tc["name"], "name": tc["name"],
@ -243,19 +249,19 @@ class Agent:
} }
) )
for tc in unique_tool_calls: for i, tc in enumerate(unique_tool_calls):
yield f"\n\n**Using tool: {tc['name']}**\n" yield f"\n\n*Calling {tc['name']}...*\n"
try: try:
result = self._tools.execute(tc["name"], tc.get("input", {})) result = self._tools.execute(tc["name"], tc.get("input", {}))
except Exception as e: except Exception as e:
result = f"Tool error: {e}" result = f"Tool error: {e}"
yield f"```\n{result[:2000]}\n```\n\n" log.info("Tool %s result: %s", tc["name"], result[:500])
self.db.add_message(conv_id, "tool", result, tool_result=tc["name"]) self.db.add_message(conv_id, "tool", result, tool_result=tc["name"])
messages.append( messages.append(
{ {
"role": "tool", "role": "tool",
"tool_call_id": tc.get("id", f"call_{tc['name']}"), "tool_call_id": resolved_ids[i],
"content": result, "content": result,
} }
) )
@ -271,12 +277,28 @@ class Agent:
else: else:
yield "\n(Reached maximum tool iterations)" yield "\n(Reached maximum tool iterations)"
# Auto-title the conversation from the first user message
self._maybe_set_title(conv_id, user_input)
# Check if memory flush is needed # Check if memory flush is needed
if self._memory: if self._memory:
msg_count = self.db.count_messages(conv_id) msg_count = self.db.count_messages(conv_id)
if msg_count > self.config.memory.flush_threshold: if msg_count > self.config.memory.flush_threshold:
self._memory.auto_flush(conv_id) self._memory.auto_flush(conv_id)
def _maybe_set_title(self, conv_id: str, user_input: str):
"""Set conversation title from first user message if still untitled."""
try:
if self.db.count_messages(conv_id) > 3:
return
title = user_input.split("\n", 1)[0].strip()
if len(title) > 50:
title = title[:47] + "..."
if title:
self.db.update_conversation_title(conv_id, title)
except Exception as e:
log.warning("Failed to set conversation title: %s", e)
def respond_to_prompt(self, prompt: str) -> str: def respond_to_prompt(self, prompt: str) -> str:
"""Non-streaming response for scheduled tasks / internal use.""" """Non-streaming response for scheduled tasks / internal use."""
result_parts = [] result_parts = []

View File

@ -109,6 +109,12 @@ class Database:
).fetchall() ).fetchall()
return [dict(r) for r in rows] return [dict(r) for r in rows]
def update_conversation_title(self, conv_id: str, title: str):
self._conn.execute(
"UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id)
)
self._conn.commit()
# -- Messages -- # -- Messages --
def add_message( def add_message(

View File

@ -19,6 +19,7 @@ import os
import shutil import shutil
import subprocess import subprocess
import sys import sys
import time
from collections.abc import Generator from collections.abc import Generator
from dataclasses import dataclass from dataclasses import dataclass
@ -234,62 +235,77 @@ class LLMAdapter:
if tools: if tools:
kwargs["tools"] = tools kwargs["tools"] = tools
try: max_retries = 2
if stream: has_yielded = False
response = client.chat.completions.create(**kwargs)
tool_calls_accum: dict[int, dict] = {}
for chunk in response:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
if delta.content:
yield {"type": "text", "content": delta.content}
if delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index
if idx not in tool_calls_accum:
tool_calls_accum[idx] = {
"id": tc.id or "",
"name": tc.function.name
if tc.function and tc.function.name
else "",
"arguments": "",
}
if tc.function and tc.function.arguments:
tool_calls_accum[idx]["arguments"] += tc.function.arguments
if tc.id:
tool_calls_accum[idx]["id"] = tc.id
for _, tc in sorted(tool_calls_accum.items()): for attempt in range(max_retries + 1):
try: try:
args = json.loads(tc["arguments"]) if stream:
except json.JSONDecodeError: response = client.chat.completions.create(**kwargs)
args = {} tool_calls_accum: dict[int, dict] = {}
yield { for chunk in response:
"type": "tool_use", delta = chunk.choices[0].delta if chunk.choices else None
"id": tc["id"], if not delta:
"name": tc["name"], continue
"input": args, if delta.content:
} has_yielded = True
else: yield {"type": "text", "content": delta.content}
response = client.chat.completions.create(**kwargs) if delta.tool_calls:
msg = response.choices[0].message for tc in delta.tool_calls:
if msg.content: idx = tc.index
yield {"type": "text", "content": msg.content} if idx not in tool_calls_accum:
if msg.tool_calls: tool_calls_accum[idx] = {
for tc in msg.tool_calls: "id": tc.id or "",
"name": tc.function.name
if tc.function and tc.function.name
else "",
"arguments": "",
}
if tc.function and tc.function.arguments:
tool_calls_accum[idx]["arguments"] += tc.function.arguments
if tc.id:
tool_calls_accum[idx]["id"] = tc.id
for _, tc in sorted(tool_calls_accum.items()):
try: try:
args = json.loads(tc.function.arguments) args = json.loads(tc["arguments"])
except json.JSONDecodeError: except json.JSONDecodeError:
args = {} args = {}
yield { yield {
"type": "tool_use", "type": "tool_use",
"id": tc.id, "id": tc["id"],
"name": tc.function.name, "name": tc["name"],
"input": args, "input": args,
} }
except Exception as e: else:
yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"} response = client.chat.completions.create(**kwargs)
msg = response.choices[0].message
if msg.content:
has_yielded = True
yield {"type": "text", "content": msg.content}
if msg.tool_calls:
for tc in msg.tool_calls:
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc.id,
"name": tc.function.name,
"input": args,
}
# Success — break out of retry loop
return
except Exception as e:
if not has_yielded and attempt < max_retries and _is_retryable_error(e):
wait = 2 ** attempt
log.warning("Retryable LLM error (attempt %d/%d), retrying in %ds: %s",
attempt + 1, max_retries + 1, wait, e)
time.sleep(wait)
continue
yield {"type": "text", "content": _friendly_error(e, self.provider)}
# ── Helpers ── # ── Helpers ──
@ -386,3 +402,35 @@ class LLMAdapter:
def list_available_models(self) -> list[ModelInfo]: def list_available_models(self) -> list[ModelInfo]:
"""Backwards-compatible alias for list_chat_models().""" """Backwards-compatible alias for list_chat_models()."""
return self.list_chat_models() return self.list_chat_models()
def _is_retryable_error(e: Exception) -> bool:
"""Return True for transient errors worth retrying (5xx, timeout, rate limit)."""
name = type(e).__name__
# openai library exceptions
if name in ("APITimeoutError", "InternalServerError", "RateLimitError", "APIConnectionError"):
return True
# Status-code based (works with openai.APIStatusError subclasses)
status = getattr(e, "status_code", None)
if status and status >= 500:
return True
return status == 429
def _friendly_error(e: Exception, provider: str) -> str:
"""Map common LLM exceptions to plain-English messages."""
name = type(e).__name__
if name == "AuthenticationError" or "401" in str(e):
return f"Authentication failed for {provider}. Please check your API key."
if name == "RateLimitError" or "429" in str(e):
return f"Rate limited by {provider}. Please wait a moment and try again."
if name in ("APITimeoutError", "APIConnectionError") or "timeout" in str(e).lower():
return (
f"Could not reach {provider} — the service may be down "
"or your connection is interrupted."
)
if name == "InternalServerError" or (getattr(e, "status_code", None) or 0) >= 500:
return f"{provider} returned a server error. Please try again shortly."
# Generic fallback — still friendlier than a raw traceback
log.error("LLM error (%s): %s", provider, e, exc_info=True)
return f"Something went wrong talking to {provider}. Check the logs for details."

View File

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import json
from pathlib import Path from pathlib import Path
@ -63,20 +64,100 @@ def format_messages_for_llm(
history: list[dict], history: list[dict],
max_messages: int = 50, max_messages: int = 50,
) -> list[dict]: ) -> list[dict]:
"""Format conversation history into LLM message format.""" """Format conversation history into LLM message format.
Handles three message types from the DB:
- user: passed through as role=user
- assistant: reconstructed with tool_calls in OpenAI format when present;
skipped if empty content AND no tool_calls
- tool: kept as role=tool with a tool_call_id linking back to the
assistant message that requested it
"""
messages = [{"role": "system", "content": system_prompt}] messages = [{"role": "system", "content": system_prompt}]
# Take the most recent messages up to the limit
recent = history[-max_messages:] if len(history) > max_messages else history recent = history[-max_messages:] if len(history) > max_messages else history
for msg in recent: for msg in recent:
role = msg.get("role", "user") role = msg.get("role", "user")
content = msg.get("content", "") content = msg.get("content", "")
if role in ("user", "assistant", "system"): tool_calls = msg.get("tool_calls") # list or None
messages.append({"role": role, "content": content})
elif role == "tool":
# Tool results go as a user message with context
tool_name = msg.get("tool_result", "unknown")
messages.append({"role": "user", "content": f'[Tool "{tool_name}" result]\n{content}'})
return messages if role == "user":
messages.append({"role": "user", "content": content})
elif role == "assistant":
# Skip completely empty assistant messages (no text, no tool_calls)
if not content and not tool_calls:
continue
entry: dict = {"role": "assistant", "content": content or None}
if tool_calls:
openai_tcs = []
for i, tc in enumerate(tool_calls):
tc_id = tc.get("id") or f"call_{tc.get('name', 'unknown')}_{i}"
openai_tcs.append(
{
"id": tc_id,
"type": "function",
"function": {
"name": tc.get("name", "unknown"),
"arguments": json.dumps(tc.get("input", {})),
},
}
)
entry["tool_calls"] = openai_tcs
messages.append(entry)
elif role == "tool":
tool_name = msg.get("tool_result", "unknown")
tc_id = _find_tool_call_id(messages, tool_name)
messages.append(
{
"role": "tool",
"tool_call_id": tc_id,
"content": content,
}
)
return _merge_consecutive(messages)
def _find_tool_call_id(messages: list[dict], tool_name: str) -> str:
"""Walk backwards through messages to find the tool_call_id for a tool result."""
for msg in reversed(messages):
if msg.get("role") != "assistant" or "tool_calls" not in msg:
continue
for tc in msg["tool_calls"]:
fn = tc.get("function", {})
if fn.get("name") == tool_name:
return tc["id"]
# Fallback: generate a deterministic ID so the API doesn't reject it
return f"call_{tool_name}_0"
def _merge_consecutive(messages: list[dict]) -> list[dict]:
"""Merge back-to-back messages with the same role to avoid API rejection.
Only merges user or assistant messages (not system or tool).
"""
if not messages:
return messages
merged: list[dict] = [messages[0]]
for msg in messages[1:]:
prev = merged[-1]
if (
msg["role"] == prev["role"]
and msg["role"] in ("user", "assistant")
and "tool_calls" not in prev
and "tool_calls" not in msg
):
# Merge text content
prev_text = prev.get("content") or ""
new_text = msg.get("content") or ""
prev["content"] = f"{prev_text}\n\n{new_text}".strip()
else:
merged.append(msg)
return merged

View File

@ -288,6 +288,16 @@ class Scheduler:
if task.task_type not in skill_map: if task.task_type not in skill_map:
continue continue
# Respect auto_execute flag — skip tasks that require manual trigger
mapping = skill_map[task.task_type]
if not mapping.get("auto_execute", False):
log.debug(
"Skipping task '%s' (type=%s): auto_execute is false",
task.name,
task.task_type,
)
continue
# Client-side verify: due_date must exist and be within window # Client-side verify: due_date must exist and be within window
if not task.due_date: if not task.due_date:
continue continue
@ -350,6 +360,27 @@ class Scheduler:
f"Task description: {state.get('custom_fields', {})}" f"Task description: {state.get('custom_fields', {})}"
) )
# Check if the tool skipped or reported an error without doing work
if result.startswith("Skipped:") or result.startswith("Error:"):
state["state"] = "failed"
state["error"] = result[:500]
state["completed_at"] = datetime.now(UTC).isoformat()
self.db.kv_set(kv_key, json.dumps(state))
client.add_comment(
task_id,
f"⚠️ CheddahBot could not execute this task.\n\n{result[:2000]}",
)
# Move back to "to do" so it can be retried
client.update_task_status(task_id, "to do")
self._notify(
f"ClickUp task skipped: **{task.name}**\n"
f"Reason: {result[:200]}"
)
log.info("ClickUp task skipped: %s%s", task.name, result[:200])
return
# Check if the tool already handled ClickUp sync internally # Check if the tool already handled ClickUp sync internally
tool_handled_sync = "## ClickUp Sync" in result tool_handled_sync = "## ClickUp Sync" in result

View File

@ -360,26 +360,45 @@ def create_ui(
agent = _get_agent(agent_name) agent = _get_agent(agent_name)
try: try:
response_text = "" response_text = ""
chat_history = [*chat_history, {"role": "assistant", "content": ""}] assistant_added = False
for chunk in agent.respond(text, files=processed_files): for chunk in agent.respond(text, files=processed_files):
response_text += chunk response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text} if not assistant_added:
chat_history = [
*chat_history,
{"role": "assistant", "content": response_text},
]
assistant_added = True
else:
chat_history[-1] = {"role": "assistant", "content": response_text}
yield chat_history, gr.update(value=None), gr.update() yield chat_history, gr.update(value=None), gr.update()
# If no response came through, show a fallback # If no response came through, show a fallback
if not response_text: if not response_text:
chat_history[-1] = { chat_history = [
"role": "assistant", *chat_history,
"content": "(No response received from model)", {
} "role": "assistant",
"content": "(No response received from model)",
},
]
yield chat_history, gr.update(value=None), gr.update() yield chat_history, gr.update(value=None), gr.update()
except Exception as e: except Exception as e:
log.error("Error in agent.respond: %s", e, exc_info=True) log.error("Error in agent.respond: %s", e, exc_info=True)
chat_history = [ error_msg = f"Sorry, something went wrong: {e}"
*chat_history, # If last message is an empty assistant bubble, replace it
{"role": "assistant", "content": f"Error: {e}"}, if (
] chat_history
and chat_history[-1].get("role") == "assistant"
and not chat_history[-1].get("content")
):
chat_history[-1] = {"role": "assistant", "content": error_msg}
else:
chat_history = [
*chat_history,
{"role": "assistant", "content": error_msg},
]
yield chat_history, gr.update(value=None), gr.update() yield chat_history, gr.update(value=None), gr.update()
# Refresh conversation list after message (title/updated_at may have changed) # Refresh conversation list after message (title/updated_at may have changed)
@ -466,8 +485,8 @@ def create_ui(
outputs=[active_agent_name, agent_selector, chatbot, conv_list_state, browser_state], outputs=[active_agent_name, agent_selector, chatbot, conv_list_state, browser_state],
) )
# Pipeline status polling timer (every 3 seconds) # Pipeline status polling timer (every 10 seconds)
status_timer = gr.Timer(3) status_timer = gr.Timer(10)
status_timer.tick(poll_pipeline_status, [active_agent_name], [pipeline_status]) status_timer.tick(poll_pipeline_status, [active_agent_name], [pipeline_status])
# Notification polling timer (every 10 seconds) # Notification polling timer (every 10 seconds)