From d9e0020b673134f74425b07a066c9a7b0196539d Mon Sep 17 00:00:00 2001
From: PeninsulaInd <bryan@peninsulaindustries.com>
Date: Thu, 19 Feb 2026 21:34:02 -0600
Subject: [PATCH] Fix UI errors, message formatting, scheduler auto_execute,
 and LLM retry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Core fixes:
- Rewrite router.py format_messages_for_llm() to properly handle tool
  call/result message pairs in OpenAI format instead of faking them as
  user messages — root cause of most LLM API errors
- Fix scheduler ignoring auto_execute:false flag, which caused all Link
  Building tasks to be incorrectly executed and moved to internal review
- Add safety check so Skipped/Error tool results don't get marked as
  completed in ClickUp

Additional improvements:
- Add LLM retry logic (2 retries on transient 5xx/timeout/rate-limit)
- Replace raw LLM tracebacks with friendly error messages
- Fix ghost assistant bubble in UI by deferring append to first chunk
- Auto-title conversations from first user message
- Consistent tool_call_id generation (resolve once, reuse everywhere)
- Reduce pipeline status polling from 3s to 10s
- Update CLAUDE.md: remove stale watchdog/autostart docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md               |   2 +-
 cheddahbot/agent.py     |  32 +++++++--
 cheddahbot/db.py        |   6 ++
 cheddahbot/llm.py       | 144 ++++++++++++++++++++++++++--------------
 cheddahbot/router.py    |  99 ++++++++++++++++++++++++---
 cheddahbot/scheduler.py |  31 +++++++++
 cheddahbot/ui.py        |  43 ++++++++----
 7 files changed, 282 insertions(+), 75 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 4cc0797..6b05849 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -202,4 +202,4 @@ Fixtures in `conftest.py`: `tmp_db` (fresh SQLite), `sample_clickup_task_data` (
 - Don't add to `requirements.txt` — use `uv add` (pyproject.toml)
 - Don't call tools directly from UI code — go through `NotificationBus` for scheduler events
 - Don't store ClickUp state outside of `kv_store` — it's the single source of truth
-- **Don't try to restart CheddahBot by killing and relaunching** — there is an autostart/watchdog that respawns the process automatically after a kill. If you need to restart, just kill the old PID and the autostart will handle it. Do NOT launch a second instance or you'll get port conflicts.
+- **Restarting CheddahBot** — there is no watchdog/autostart. Kill the old process, then relaunch with `uv run python -m cheddahbot` (or `bash start.sh`). Do NOT launch a second instance without killing the first or you'll get port conflicts.
diff --git a/cheddahbot/agent.py b/cheddahbot/agent.py
index 421853a..4bbd86f 100644
--- a/cheddahbot/agent.py
+++ b/cheddahbot/agent.py
@@ -223,10 +223,16 @@ class Agent:
 
             # Execute tools
             if self._tools:
+                # Resolve IDs once so assistant message and tool responses match
+                resolved_ids = [
+                    tc.get("id") or f"call_{tc['name']}_{i}"
+                    for i, tc in enumerate(unique_tool_calls)
+                ]
+
                 # Build OpenAI-format assistant message with tool_calls
                 openai_tool_calls = [
                     {
-                        "id": tc.get("id", f"call_{tc['name']}_{i}"),
+                        "id": resolved_ids[i],
                         "type": "function",
                         "function": {
                             "name": tc["name"],
@@ -243,19 +249,19 @@ class Agent:
                     }
                 )
 
-                for tc in unique_tool_calls:
-                    yield f"\n\n**Using tool: {tc['name']}**\n"
+                for i, tc in enumerate(unique_tool_calls):
+                    yield f"\n\n*Calling {tc['name']}...*\n"
                     try:
                         result = self._tools.execute(tc["name"], tc.get("input", {}))
                     except Exception as e:
                         result = f"Tool error: {e}"
-                    yield f"```\n{result[:2000]}\n```\n\n"
+                    log.info("Tool %s result: %s", tc["name"], result[:500])
 
                     self.db.add_message(conv_id, "tool", result, tool_result=tc["name"])
                     messages.append(
                         {
                             "role": "tool",
-                            "tool_call_id": tc.get("id", f"call_{tc['name']}"),
+                            "tool_call_id": resolved_ids[i],
                             "content": result,
                         }
                     )
@@ -271,12 +277,28 @@ class Agent:
         else:
             yield "\n(Reached maximum tool iterations)"
 
+        # Auto-title the conversation from the first user message
+        self._maybe_set_title(conv_id, user_input)
+
         # Check if memory flush is needed
         if self._memory:
             msg_count = self.db.count_messages(conv_id)
             if msg_count > self.config.memory.flush_threshold:
                 self._memory.auto_flush(conv_id)
 
+    def _maybe_set_title(self, conv_id: str, user_input: str):
+        """Set conversation title from first user message if still untitled."""
+        try:
+            if self.db.count_messages(conv_id) > 3:
+                return
+            title = user_input.split("\n", 1)[0].strip()
+            if len(title) > 50:
+                title = title[:47] + "..."
+            if title:
+                self.db.update_conversation_title(conv_id, title)
+        except Exception as e:
+            log.warning("Failed to set conversation title: %s", e)
+
     def respond_to_prompt(self, prompt: str) -> str:
         """Non-streaming response for scheduled tasks / internal use."""
         result_parts = []
diff --git a/cheddahbot/db.py b/cheddahbot/db.py
index 141e7fd..b5af9aa 100644
--- a/cheddahbot/db.py
+++ b/cheddahbot/db.py
@@ -109,6 +109,12 @@ class Database:
             ).fetchall()
         return [dict(r) for r in rows]
 
+    def update_conversation_title(self, conv_id: str, title: str):
+        self._conn.execute(
+            "UPDATE conversations SET title = ? WHERE id = ?", (title, conv_id)
+        )
+        self._conn.commit()
+
     # -- Messages --
 
     def add_message(
diff --git a/cheddahbot/llm.py b/cheddahbot/llm.py
index e36089d..1c1c59c 100644
--- a/cheddahbot/llm.py
+++ b/cheddahbot/llm.py
@@ -19,6 +19,7 @@ import os
 import shutil
 import subprocess
 import sys
+import time
 from collections.abc import Generator
 from dataclasses import dataclass
 
@@ -234,62 +235,77 @@ class LLMAdapter:
         if tools:
             kwargs["tools"] = tools
 
-        try:
-            if stream:
-                response = client.chat.completions.create(**kwargs)
-                tool_calls_accum: dict[int, dict] = {}
-                for chunk in response:
-                    delta = chunk.choices[0].delta if chunk.choices else None
-                    if not delta:
-                        continue
-                    if delta.content:
-                        yield {"type": "text", "content": delta.content}
-                    if delta.tool_calls:
-                        for tc in delta.tool_calls:
-                            idx = tc.index
-                            if idx not in tool_calls_accum:
-                                tool_calls_accum[idx] = {
-                                    "id": tc.id or "",
-                                    "name": tc.function.name
-                                    if tc.function and tc.function.name
-                                    else "",
-                                    "arguments": "",
-                                }
-                            if tc.function and tc.function.arguments:
-                                tool_calls_accum[idx]["arguments"] += tc.function.arguments
-                            if tc.id:
-                                tool_calls_accum[idx]["id"] = tc.id
+        max_retries = 2
+        has_yielded = False
 
-                for _, tc in sorted(tool_calls_accum.items()):
-                    try:
-                        args = json.loads(tc["arguments"])
-                    except json.JSONDecodeError:
-                        args = {}
-                    yield {
-                        "type": "tool_use",
-                        "id": tc["id"],
-                        "name": tc["name"],
-                        "input": args,
-                    }
-            else:
-                response = client.chat.completions.create(**kwargs)
-                msg = response.choices[0].message
-                if msg.content:
-                    yield {"type": "text", "content": msg.content}
-                if msg.tool_calls:
-                    for tc in msg.tool_calls:
+        for attempt in range(max_retries + 1):
+            try:
+                if stream:
+                    response = client.chat.completions.create(**kwargs)
+                    tool_calls_accum: dict[int, dict] = {}
+                    for chunk in response:
+                        delta = chunk.choices[0].delta if chunk.choices else None
+                        if not delta:
+                            continue
+                        if delta.content:
+                            has_yielded = True
+                            yield {"type": "text", "content": delta.content}
+                        if delta.tool_calls:
+                            for tc in delta.tool_calls:
+                                idx = tc.index
+                                if idx not in tool_calls_accum:
+                                    tool_calls_accum[idx] = {
+                                        "id": tc.id or "",
+                                        "name": tc.function.name
+                                        if tc.function and tc.function.name
+                                        else "",
+                                        "arguments": "",
+                                    }
+                                if tc.function and tc.function.arguments:
+                                    tool_calls_accum[idx]["arguments"] += tc.function.arguments
+                                if tc.id:
+                                    tool_calls_accum[idx]["id"] = tc.id
+
+                    for _, tc in sorted(tool_calls_accum.items()):
                         try:
-                            args = json.loads(tc.function.arguments)
+                            args = json.loads(tc["arguments"])
                         except json.JSONDecodeError:
                             args = {}
                         yield {
                             "type": "tool_use",
-                            "id": tc.id,
-                            "name": tc.function.name,
+                            "id": tc["id"],
+                            "name": tc["name"],
                             "input": args,
                         }
-        except Exception as e:
-            yield {"type": "text", "content": f"LLM error ({self.provider}): {e}"}
+                else:
+                    response = client.chat.completions.create(**kwargs)
+                    msg = response.choices[0].message
+                    if msg.content:
+                        has_yielded = True
+                        yield {"type": "text", "content": msg.content}
+                    if msg.tool_calls:
+                        for tc in msg.tool_calls:
+                            try:
+                                args = json.loads(tc.function.arguments)
+                            except json.JSONDecodeError:
+                                args = {}
+                            yield {
+                                "type": "tool_use",
+                                "id": tc.id,
+                                "name": tc.function.name,
+                                "input": args,
+                            }
+                # Success — break out of retry loop
+                return
+
+            except Exception as e:
+                if not has_yielded and attempt < max_retries and _is_retryable_error(e):
+                    wait = 2 ** attempt
+                    log.warning("Retryable LLM error (attempt %d/%d), retrying in %ds: %s",
+                                attempt + 1, max_retries + 1, wait, e)
+                    time.sleep(wait)
+                    continue
+                yield {"type": "text", "content": _friendly_error(e, self.provider)}
 
     # ── Helpers ──
 
@@ -386,3 +402,35 @@ class LLMAdapter:
     def list_available_models(self) -> list[ModelInfo]:
         """Backwards-compatible alias for list_chat_models()."""
         return self.list_chat_models()
+
+
+def _is_retryable_error(e: Exception) -> bool:
+    """Return True for transient errors worth retrying (5xx, timeout, rate limit)."""
+    name = type(e).__name__
+    # openai library exceptions
+    if name in ("APITimeoutError", "InternalServerError", "RateLimitError", "APIConnectionError"):
+        return True
+    # Status-code based (works with openai.APIStatusError subclasses)
+    status = getattr(e, "status_code", None)
+    if status and status >= 500:
+        return True
+    return status == 429
+
+
+def _friendly_error(e: Exception, provider: str) -> str:
+    """Map common LLM exceptions to plain-English messages."""
+    name = type(e).__name__
+    if name == "AuthenticationError" or "401" in str(e):
+        return f"Authentication failed for {provider}. Please check your API key."
+    if name == "RateLimitError" or "429" in str(e):
+        return f"Rate limited by {provider}. Please wait a moment and try again."
+    if name in ("APITimeoutError", "APIConnectionError") or "timeout" in str(e).lower():
+        return (
+            f"Could not reach {provider} — the service may be down "
+            "or your connection is interrupted."
+        )
+    if name == "InternalServerError" or (getattr(e, "status_code", None) or 0) >= 500:
+        return f"{provider} returned a server error. Please try again shortly."
+    # Generic fallback — still friendlier than a raw traceback
+    log.error("LLM error (%s): %s", provider, e, exc_info=True)
+    return f"Something went wrong talking to {provider}. Check the logs for details."
diff --git a/cheddahbot/router.py b/cheddahbot/router.py
index df239db..43d8d9e 100644
--- a/cheddahbot/router.py
+++ b/cheddahbot/router.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import json
 from pathlib import Path
 
 
@@ -63,20 +64,100 @@ def format_messages_for_llm(
     history: list[dict],
     max_messages: int = 50,
 ) -> list[dict]:
-    """Format conversation history into LLM message format."""
+    """Format conversation history into LLM message format.
+
+    Handles three message types from the DB:
+    - user: passed through as role=user
+    - assistant: reconstructed with tool_calls in OpenAI format when present;
+      skipped if empty content AND no tool_calls
+    - tool: kept as role=tool with a tool_call_id linking back to the
+      assistant message that requested it
+    """
     messages = [{"role": "system", "content": system_prompt}]
 
-    # Take the most recent messages up to the limit
     recent = history[-max_messages:] if len(history) > max_messages else history
 
     for msg in recent:
         role = msg.get("role", "user")
         content = msg.get("content", "")
-        if role in ("user", "assistant", "system"):
-            messages.append({"role": role, "content": content})
-        elif role == "tool":
-            # Tool results go as a user message with context
-            tool_name = msg.get("tool_result", "unknown")
-            messages.append({"role": "user", "content": f'[Tool "{tool_name}" result]\n{content}'})
+        tool_calls = msg.get("tool_calls")  # list or None
 
-    return messages
+        if role == "user":
+            messages.append({"role": "user", "content": content})
+
+        elif role == "assistant":
+            # Skip completely empty assistant messages (no text, no tool_calls)
+            if not content and not tool_calls:
+                continue
+
+            entry: dict = {"role": "assistant", "content": content or None}
+
+            if tool_calls:
+                openai_tcs = []
+                for i, tc in enumerate(tool_calls):
+                    tc_id = tc.get("id") or f"call_{tc.get('name', 'unknown')}_{i}"
+                    openai_tcs.append(
+                        {
+                            "id": tc_id,
+                            "type": "function",
+                            "function": {
+                                "name": tc.get("name", "unknown"),
+                                "arguments": json.dumps(tc.get("input", {})),
+                            },
+                        }
+                    )
+                entry["tool_calls"] = openai_tcs
+
+            messages.append(entry)
+
+        elif role == "tool":
+            tool_name = msg.get("tool_result", "unknown")
+            tc_id = _find_tool_call_id(messages, tool_name)
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": tc_id,
+                    "content": content,
+                }
+            )
+
+    return _merge_consecutive(messages)
+
+
+def _find_tool_call_id(messages: list[dict], tool_name: str) -> str:
+    """Walk backwards through messages to find the tool_call_id for a tool result."""
+    for msg in reversed(messages):
+        if msg.get("role") != "assistant" or "tool_calls" not in msg:
+            continue
+        for tc in msg["tool_calls"]:
+            fn = tc.get("function", {})
+            if fn.get("name") == tool_name:
+                return tc["id"]
+    # Fallback: generate a deterministic ID so the API doesn't reject it
+    return f"call_{tool_name}_0"
+
+
+def _merge_consecutive(messages: list[dict]) -> list[dict]:
+    """Merge back-to-back messages with the same role to avoid API rejection.
+
+    Only merges user or assistant messages (not system or tool).
+    """
+    if not messages:
+        return messages
+
+    merged: list[dict] = [messages[0]]
+    for msg in messages[1:]:
+        prev = merged[-1]
+        if (
+            msg["role"] == prev["role"]
+            and msg["role"] in ("user", "assistant")
+            and "tool_calls" not in prev
+            and "tool_calls" not in msg
+        ):
+            # Merge text content
+            prev_text = prev.get("content") or ""
+            new_text = msg.get("content") or ""
+            prev["content"] = f"{prev_text}\n\n{new_text}".strip()
+        else:
+            merged.append(msg)
+    return merged
diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py
index 12f1405..556475d 100644
--- a/cheddahbot/scheduler.py
+++ b/cheddahbot/scheduler.py
@@ -288,6 +288,16 @@ class Scheduler:
             if task.task_type not in skill_map:
                 continue
 
+            # Respect auto_execute flag — skip tasks that require manual trigger
+            mapping = skill_map[task.task_type]
+            if not mapping.get("auto_execute", False):
+                log.debug(
+                    "Skipping task '%s' (type=%s): auto_execute is false",
+                    task.name,
+                    task.task_type,
+                )
+                continue
+
             # Client-side verify: due_date must exist and be within window
             if not task.due_date:
                 continue
@@ -350,6 +360,27 @@ class Scheduler:
                     f"Task description: {state.get('custom_fields', {})}"
                 )
 
+            # Check if the tool skipped or reported an error without doing work
+            if result.startswith("Skipped:") or result.startswith("Error:"):
+                state["state"] = "failed"
+                state["error"] = result[:500]
+                state["completed_at"] = datetime.now(UTC).isoformat()
+                self.db.kv_set(kv_key, json.dumps(state))
+
+                client.add_comment(
+                    task_id,
+                    f"⚠️ CheddahBot could not execute this task.\n\n{result[:2000]}",
+                )
+                # Move back to "to do" so it can be retried
+                client.update_task_status(task_id, "to do")
+
+                self._notify(
+                    f"ClickUp task skipped: **{task.name}**\n"
+                    f"Reason: {result[:200]}"
+                )
+                log.info("ClickUp task skipped: %s — %s", task.name, result[:200])
+                return
+
             # Check if the tool already handled ClickUp sync internally
             tool_handled_sync = "## ClickUp Sync" in result
 
diff --git a/cheddahbot/ui.py b/cheddahbot/ui.py
index 69c8482..955fb30 100644
--- a/cheddahbot/ui.py
+++ b/cheddahbot/ui.py
@@ -360,26 +360,45 @@ def create_ui(
             agent = _get_agent(agent_name)
             try:
                 response_text = ""
-                chat_history = [*chat_history, {"role": "assistant", "content": ""}]
+                assistant_added = False
 
                 for chunk in agent.respond(text, files=processed_files):
                     response_text += chunk
-                    chat_history[-1] = {"role": "assistant", "content": response_text}
+                    if not assistant_added:
+                        chat_history = [
+                            *chat_history,
+                            {"role": "assistant", "content": response_text},
+                        ]
+                        assistant_added = True
+                    else:
+                        chat_history[-1] = {"role": "assistant", "content": response_text}
                     yield chat_history, gr.update(value=None), gr.update()
 
                 # If no response came through, show a fallback
                 if not response_text:
-                    chat_history[-1] = {
-                        "role": "assistant",
-                        "content": "(No response received from model)",
-                    }
+                    chat_history = [
+                        *chat_history,
+                        {
+                            "role": "assistant",
+                            "content": "(No response received from model)",
+                        },
+                    ]
                     yield chat_history, gr.update(value=None), gr.update()
             except Exception as e:
                 log.error("Error in agent.respond: %s", e, exc_info=True)
-                chat_history = [
-                    *chat_history,
-                    {"role": "assistant", "content": f"Error: {e}"},
-                ]
+                error_msg = f"Sorry, something went wrong: {e}"
+                # If last message is an empty assistant bubble, replace it
+                if (
+                    chat_history
+                    and chat_history[-1].get("role") == "assistant"
+                    and not chat_history[-1].get("content")
+                ):
+                    chat_history[-1] = {"role": "assistant", "content": error_msg}
+                else:
+                    chat_history = [
+                        *chat_history,
+                        {"role": "assistant", "content": error_msg},
+                    ]
                 yield chat_history, gr.update(value=None), gr.update()
 
             # Refresh conversation list after message (title/updated_at may have changed)
@@ -466,8 +485,8 @@ def create_ui(
             outputs=[active_agent_name, agent_selector, chatbot, conv_list_state, browser_state],
         )
 
-        # Pipeline status polling timer (every 3 seconds)
-        status_timer = gr.Timer(3)
+        # Pipeline status polling timer (every 10 seconds)
+        status_timer = gr.Timer(10)
         status_timer.tick(poll_pipeline_status, [active_agent_name], [pipeline_status])
 
         # Notification polling timer (every 10 seconds)