diff --git a/cheddahbot/clickup.py b/cheddahbot/clickup.py index 181461f..3f302e6 100644 --- a/cheddahbot/clickup.py +++ b/cheddahbot/clickup.py @@ -31,6 +31,7 @@ class ClickUpTask: list_name: str = "" tags: list[str] = field(default_factory=list) date_done: str = "" + date_updated: str = "" @classmethod def from_api(cls, data: dict, task_type_field_name: str = "Task Type") -> ClickUpTask: @@ -67,6 +68,9 @@ class ClickUpTask: raw_done = data.get("date_done") or data.get("date_closed") date_done = str(raw_done) if raw_done else "" + raw_updated = data.get("date_updated") + date_updated = str(raw_updated) if raw_updated else "" + return cls( id=data["id"], name=data.get("name", ""), @@ -80,6 +84,7 @@ class ClickUpTask: list_name=data.get("list", {}).get("name", ""), tags=tags, date_done=date_done, + date_updated=date_updated, ) diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py index aea7743..d35c9a9 100644 --- a/cheddahbot/scheduler.py +++ b/cheddahbot/scheduler.py @@ -267,6 +267,9 @@ class Scheduler: ) return self._clickup_client + # Maximum time a task can stay in "automation underway" before recovery (seconds) + STALE_TASK_THRESHOLD_SECONDS = 2 * 60 * 60 # 2 hours + def _clickup_loop(self): """Poll ClickUp for tasks on a regular interval.""" interval = self.config.clickup.poll_interval_minutes * 60 @@ -277,6 +280,7 @@ class Scheduler: while not self._stop_event.is_set(): try: self._poll_clickup() + self._recover_stale_tasks() self.db.kv_set( "system:loop:clickup:last_run", datetime.now(UTC).isoformat() ) @@ -516,6 +520,58 @@ class Scheduler: ) log.error("ClickUp task failed: %s — %s", task.name, e) + def _recover_stale_tasks(self): + """Reset tasks stuck in 'automation underway' for too long. + + If a task has been in the automation status for more than + STALE_TASK_THRESHOLD_SECONDS (default 2 hours), reset it to + the first poll status (usually 'to do') so it gets retried. + """ + client = self._get_clickup_client() + space_id = self.config.clickup.space_id + if not space_id: + return + + automation_status = self.config.clickup.automation_status + try: + stale_tasks = client.get_tasks_from_space( + space_id, statuses=[automation_status] + ) + except Exception as e: + log.warning("Failed to query stale tasks: %s", e) + return + + now_ms = int(datetime.now(UTC).timestamp() * 1000) + threshold_ms = self.STALE_TASK_THRESHOLD_SECONDS * 1000 + + for task in stale_tasks: + if not task.date_updated: + continue + try: + updated_ms = int(task.date_updated) + except (ValueError, TypeError): + continue + + age_ms = now_ms - updated_ms + if age_ms > threshold_ms: + reset_status = self.config.clickup.poll_statuses[0] if self.config.clickup.poll_statuses else "to do" + log.warning( + "Recovering stale task %s (%s) — stuck in '%s' for %.1f hours", + task.id, task.name, automation_status, age_ms / 3_600_000, + ) + client.update_task_status(task.id, reset_status) + client.add_comment( + task.id, + f"⚠️ CheddahBot auto-recovered this task. It was stuck in " + f"'{automation_status}' for {age_ms / 3_600_000:.1f} hours. " + f"Reset to '{reset_status}' for retry.", + ) + self._notify( + f"Recovered stale task: **{task.name}** — " + f"reset from '{automation_status}' to '{reset_status}'", + category="clickup", + ) + def _build_tool_args(self, state: dict) -> dict: """Build tool arguments from ClickUp task fields using the field mapping.""" skill_map = self.config.clickup.skill_map