Fix UI errors, message formatting, scheduler auto_execute, and LLM retry

Core fixes:
- Rewrite router.py format_messages_for_llm() to properly handle tool
  call/result message pairs in OpenAI format instead of faking them as
  user messages — root cause of most LLM API errors
- Fix scheduler ignoring auto_execute:false flag, which caused all Link
  Building tasks to be incorrectly executed and moved to internal review
- Add safety check so Skipped/Error tool results don't get marked as
  completed in ClickUp

Additional improvements:
- Add LLM retry logic (2 retries on transient 5xx/timeout/rate-limit)
- Replace raw LLM tracebacks with friendly error messages
- Fix ghost assistant bubble in UI by deferring append to first chunk
- Auto-title conversations from first user message
- Consistent tool_call_id generation (resolve once, reuse everywhere)
- Reduce pipeline status polling from 3s to 10s
- Update CLAUDE.md: remove stale watchdog/autostart docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
cora-start
PeninsulaInd 2026-02-19 21:34:02 -06:00
parent 916bec8c0e
commit d9e0020b67
7 changed files with 282 additions and 75 deletions

View File

@ -202,4 +202,4 @@ Fixtures in `conftest.py`: `tmp_db` (fresh SQLite), `sample_clickup_task_data` (
- Don't add to `requirements.txt` — use `uv add` (pyproject.toml)
- Don't call tools directly from UI code — go through `NotificationBus` for scheduler events
- Don't store ClickUp state outside of `kv_store` — it's the single source of truth
- **Don't try to restart CheddahBot by killing and relaunching** — there is an autostart/watchdog that respawns the process automatically after a kill. If you need to restart, just kill the old PID and the autostart will handle it. Do NOT launch a second instance or you'll get port conflicts.
- **Restarting CheddahBot** — there is no watchdog/autostart. Kill the old process, then relaunch with `uv run python -m cheddahbot` (or `bash start.sh`). Do NOT launch a second instance without killing the first or you'll get port conflicts.

View File

@ -223,10 +223,16 @@ class Agent:
# Execute tools
if self._tools:
# Resolve IDs once so assistant message and tool responses match
resolved_ids = [
tc.get("id") or f"call_{tc['name']}_{i}"
for i, tc in enumerate(unique_tool_calls)
]
# Build OpenAI-format assistant message with tool_calls
openai_tool_calls = [
{
"id": tc.get("id", f"call_{tc['name']}_{i}"),
"id": resolved_ids[i],
"type": "function",
"function": {
"name": tc["name"],
@ -243,19 +249,19 @@ class Agent:
}
)
for tc in unique_tool_calls:
yield f"\n\n**Using tool: {tc['name']}**\n"
for i, tc in enumerate(unique_tool_calls):
yield f"\n\n*Calling {tc['name']}...*\n"
try:
result = self._tools.execute(tc["name"], tc.get("input", {}))
except Exception as e:
result = f"Tool error: {e}"
yield f"```\n{result[:2000]}\n```\n\n"
log.info("Tool %s result: %s", tc["name"], result[:500])
self.db.add_message(conv_id, "tool", result, tool_result=tc["name"])
messages.append(
{
"role": "tool",
"tool_call_id": tc.get("id", f"call_{tc['name']}"),
"tool_call_id": resolved_ids[i],
"content": result,
}
)
@ -271,12 +277,28 @@ class Agent:
else:
yield "\n(Reached maximum tool iterations)"
# Auto-title the conversation from the first user message
self._maybe_set_title(conv_id, user_input)
# Check if memory flush is needed
if self._memory:
msg_count = self.db.count_messages(conv_id)
if msg_count > self.config.memory.flush_threshold:
self._memory.auto_flush(conv_id)
def _maybe_set_title(self, conv_id: str, user_input: str):
"""Set conversation title from first user message if still untitled."""
try:
if self.db.count_messages(conv_id) > 3:
return
title = user_input.split("\n", 1)[0].strip()
if len(title) > 50:
title = title[:47] + "..."
if title:
self.db.update_conversation_title(conv_id, title)
except Exception as e:
log.warning("Failed to set conversation title: %s", e)
def respond_to_prompt(self, prompt: str) -> str:
"""Non-streaming response for scheduled tasks / internal use."""
result_parts = []

View File

@ -109,6 +109,12 @@ class Database:
).fetchall()
return [dict(r) for r in rows]
def update_conversation_title(self, conv_id: str, title: str):
self._conn.execute(
"UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id)
)
self._conn.commit()
# -- Messages --
def add_message(

View File

@ -19,6 +19,7 @@ import os
import shutil
import subprocess
import sys
import time
from collections.abc import Generator
from dataclasses import dataclass
@ -234,62 +235,77 @@ class LLMAdapter:
if tools:
kwargs["tools"] = tools
try:
if stream:
response = client.chat.completions.create(**kwargs)
tool_calls_accum: dict[int, dict] = {}
for chunk in response:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
if delta.content:
yield {"type": "text", "content": delta.content}
if delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index
if idx not in tool_calls_accum:
tool_calls_accum[idx] = {
"id": tc.id or "",
"name": tc.function.name
if tc.function and tc.function.name
else "",
"arguments": "",
}
if tc.function and tc.function.arguments:
tool_calls_accum[idx]["arguments"] += tc.function.arguments
if tc.id:
tool_calls_accum[idx]["id"] = tc.id
max_retries = 2
has_yielded = False
for _, tc in sorted(tool_calls_accum.items()):
try:
args = json.loads(tc["arguments"])
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc["id"],
"name": tc["name"],
"input": args,
}
else:
response = client.chat.completions.create(**kwargs)
msg = response.choices[0].message
if msg.content:
yield {"type": "text", "content": msg.content}
if msg.tool_calls:
for tc in msg.tool_calls:
for attempt in range(max_retries + 1):
try:
if stream:
response = client.chat.completions.create(**kwargs)
tool_calls_accum: dict[int, dict] = {}
for chunk in response:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
if delta.content:
has_yielded = True
yield {"type": "text", "content": delta.content}
if delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index
if idx not in tool_calls_accum:
tool_calls_accum[idx] = {
"id": tc.id or "",
"name": tc.function.name
if tc.function and tc.function.name
else "",
"arguments": "",
}
if tc.function and tc.function.arguments:
tool_calls_accum[idx]["arguments"] += tc.function.arguments
if tc.id:
tool_calls_accum[idx]["id"] = tc.id
for _, tc in sorted(tool_calls_accum.items()):
try:
args = json.loads(tc.function.arguments)
args = json.loads(tc["arguments"])
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc.id,
"name": tc.function.name,
"id": tc["id"],
"name": tc["name"],
"input": args,
}
except Exception as e:
yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"}
else:
response = client.chat.completions.create(**kwargs)
msg = response.choices[0].message
if msg.content:
has_yielded = True
yield {"type": "text", "content": msg.content}
if msg.tool_calls:
for tc in msg.tool_calls:
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
args = {}
yield {
"type": "tool_use",
"id": tc.id,
"name": tc.function.name,
"input": args,
}
# Success — break out of retry loop
return
except Exception as e:
if not has_yielded and attempt < max_retries and _is_retryable_error(e):
wait = 2 ** attempt
log.warning("Retryable LLM error (attempt %d/%d), retrying in %ds: %s",
attempt + 1, max_retries + 1, wait, e)
time.sleep(wait)
continue
yield {"type": "text", "content": _friendly_error(e, self.provider)}
# ── Helpers ──
@ -386,3 +402,35 @@ class LLMAdapter:
def list_available_models(self) -> list[ModelInfo]:
"""Backwards-compatible alias for list_chat_models()."""
return self.list_chat_models()
def _is_retryable_error(e: Exception) -> bool:
"""Return True for transient errors worth retrying (5xx, timeout, rate limit)."""
name = type(e).__name__
# openai library exceptions
if name in ("APITimeoutError", "InternalServerError", "RateLimitError", "APIConnectionError"):
return True
# Status-code based (works with openai.APIStatusError subclasses)
status = getattr(e, "status_code", None)
if status and status >= 500:
return True
return status == 429
def _friendly_error(e: Exception, provider: str) -> str:
"""Map common LLM exceptions to plain-English messages."""
name = type(e).__name__
if name == "AuthenticationError" or "401" in str(e):
return f"Authentication failed for {provider}. Please check your API key."
if name == "RateLimitError" or "429" in str(e):
return f"Rate limited by {provider}. Please wait a moment and try again."
if name in ("APITimeoutError", "APIConnectionError") or "timeout" in str(e).lower():
return (
f"Could not reach {provider} — the service may be down "
"or your connection is interrupted."
)
if name == "InternalServerError" or (getattr(e, "status_code", None) or 0) >= 500:
return f"{provider} returned a server error. Please try again shortly."
# Generic fallback — still friendlier than a raw traceback
log.error("LLM error (%s): %s", provider, e, exc_info=True)
return f"Something went wrong talking to {provider}. Check the logs for details."

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import json
from pathlib import Path
@ -63,20 +64,100 @@ def format_messages_for_llm(
history: list[dict],
max_messages: int = 50,
) -> list[dict]:
"""Format conversation history into LLM message format."""
"""Format conversation history into LLM message format.
Handles three message types from the DB:
- user: passed through as role=user
- assistant: reconstructed with tool_calls in OpenAI format when present;
skipped if empty content AND no tool_calls
- tool: kept as role=tool with a tool_call_id linking back to the
assistant message that requested it
"""
messages = [{"role": "system", "content": system_prompt}]
# Take the most recent messages up to the limit
recent = history[-max_messages:] if len(history) > max_messages else history
for msg in recent:
role = msg.get("role", "user")
content = msg.get("content", "")
if role in ("user", "assistant", "system"):
messages.append({"role": role, "content": content})
elif role == "tool":
# Tool results go as a user message with context
tool_name = msg.get("tool_result", "unknown")
messages.append({"role": "user", "content": f'[Tool "{tool_name}" result]\n{content}'})
tool_calls = msg.get("tool_calls") # list or None
return messages
if role == "user":
messages.append({"role": "user", "content": content})
elif role == "assistant":
# Skip completely empty assistant messages (no text, no tool_calls)
if not content and not tool_calls:
continue
entry: dict = {"role": "assistant", "content": content or None}
if tool_calls:
openai_tcs = []
for i, tc in enumerate(tool_calls):
tc_id = tc.get("id") or f"call_{tc.get('name', 'unknown')}_{i}"
openai_tcs.append(
{
"id": tc_id,
"type": "function",
"function": {
"name": tc.get("name", "unknown"),
"arguments": json.dumps(tc.get("input", {})),
},
}
)
entry["tool_calls"] = openai_tcs
messages.append(entry)
elif role == "tool":
tool_name = msg.get("tool_result", "unknown")
tc_id = _find_tool_call_id(messages, tool_name)
messages.append(
{
"role": "tool",
"tool_call_id": tc_id,
"content": content,
}
)
return _merge_consecutive(messages)
def _find_tool_call_id(messages: list[dict], tool_name: str) -> str:
"""Walk backwards through messages to find the tool_call_id for a tool result."""
for msg in reversed(messages):
if msg.get("role") != "assistant" or "tool_calls" not in msg:
continue
for tc in msg["tool_calls"]:
fn = tc.get("function", {})
if fn.get("name") == tool_name:
return tc["id"]
# Fallback: generate a deterministic ID so the API doesn't reject it
return f"call_{tool_name}_0"
def _merge_consecutive(messages: list[dict]) -> list[dict]:
"""Merge back-to-back messages with the same role to avoid API rejection.
Only merges user or assistant messages (not system or tool).
"""
if not messages:
return messages
merged: list[dict] = [messages[0]]
for msg in messages[1:]:
prev = merged[-1]
if (
msg["role"] == prev["role"]
and msg["role"] in ("user", "assistant")
and "tool_calls" not in prev
and "tool_calls" not in msg
):
# Merge text content
prev_text = prev.get("content") or ""
new_text = msg.get("content") or ""
prev["content"] = f"{prev_text}\n\n{new_text}".strip()
else:
merged.append(msg)
return merged

View File

@ -288,6 +288,16 @@ class Scheduler:
if task.task_type not in skill_map:
continue
# Respect auto_execute flag — skip tasks that require manual trigger
mapping = skill_map[task.task_type]
if not mapping.get("auto_execute", False):
log.debug(
"Skipping task '%s' (type=%s): auto_execute is false",
task.name,
task.task_type,
)
continue
# Client-side verify: due_date must exist and be within window
if not task.due_date:
continue
@ -350,6 +360,27 @@ class Scheduler:
f"Task description: {state.get('custom_fields', {})}"
)
# Check if the tool skipped or reported an error without doing work
if result.startswith("Skipped:") or result.startswith("Error:"):
state["state"] = "failed"
state["error"] = result[:500]
state["completed_at"] = datetime.now(UTC).isoformat()
self.db.kv_set(kv_key, json.dumps(state))
client.add_comment(
task_id,
f"⚠️ CheddahBot could not execute this task.\n\n{result[:2000]}",
)
# Move back to "to do" so it can be retried
client.update_task_status(task_id, "to do")
self._notify(
f"ClickUp task skipped: **{task.name}**\n"
f"Reason: {result[:200]}"
)
log.info("ClickUp task skipped: %s%s", task.name, result[:200])
return
# Check if the tool already handled ClickUp sync internally
tool_handled_sync = "## ClickUp Sync" in result

View File

@ -360,26 +360,45 @@ def create_ui(
agent = _get_agent(agent_name)
try:
response_text = ""
chat_history = [*chat_history, {"role": "assistant", "content": ""}]
assistant_added = False
for chunk in agent.respond(text, files=processed_files):
response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text}
if not assistant_added:
chat_history = [
*chat_history,
{"role": "assistant", "content": response_text},
]
assistant_added = True
else:
chat_history[-1] = {"role": "assistant", "content": response_text}
yield chat_history, gr.update(value=None), gr.update()
# If no response came through, show a fallback
if not response_text:
chat_history[-1] = {
"role": "assistant",
"content": "(No response received from model)",
}
chat_history = [
*chat_history,
{
"role": "assistant",
"content": "(No response received from model)",
},
]
yield chat_history, gr.update(value=None), gr.update()
except Exception as e:
log.error("Error in agent.respond: %s", e, exc_info=True)
chat_history = [
*chat_history,
{"role": "assistant", "content": f"Error: {e}"},
]
error_msg = f"Sorry, something went wrong: {e}"
# If last message is an empty assistant bubble, replace it
if (
chat_history
and chat_history[-1].get("role") == "assistant"
and not chat_history[-1].get("content")
):
chat_history[-1] = {"role": "assistant", "content": error_msg}
else:
chat_history = [
*chat_history,
{"role": "assistant", "content": error_msg},
]
yield chat_history, gr.update(value=None), gr.update()
# Refresh conversation list after message (title/updated_at may have changed)
@ -466,8 +485,8 @@ def create_ui(
outputs=[active_agent_name, agent_selector, chatbot, conv_list_state, browser_state],
)
# Pipeline status polling timer (every 3 seconds)
status_timer = gr.Timer(3)
# Pipeline status polling timer (every 10 seconds)
status_timer = gr.Timer(10)
status_timer.tick(poll_pipeline_status, [active_agent_name], [pipeline_status])
# Notification polling timer (every 10 seconds)