1560 lines
62 KiB
Python
1560 lines
62 KiB
Python
"""Task scheduler with heartbeat, ClickUp polling, and folder watch support."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import contextlib
|
|
import json
|
|
import logging
|
|
import re
|
|
import shutil
|
|
import threading
|
|
|
|
import httpx
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
from zoneinfo import ZoneInfo
|
|
|
|
from croniter import croniter
|
|
|
|
if TYPE_CHECKING:
|
|
from .agent import Agent
|
|
from .config import Config
|
|
from .db import Database
|
|
from .notifications import NotificationBus
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Dedicated logger for "tool returned error but likely handled it" cases.
|
|
# Writes to logs/pipeline_errors.log for manual review.
|
|
_pipeline_err_log = logging.getLogger("cheddahbot.pipeline_errors")
|
|
_pipeline_err_log.propagate = False
|
|
_pe_dir = Path(__file__).resolve().parent.parent / "logs"
|
|
_pe_dir.mkdir(exist_ok=True)
|
|
_pe_handler = logging.FileHandler(_pe_dir / "pipeline_errors.log", encoding="utf-8")
|
|
_pe_handler.setFormatter(
|
|
logging.Formatter("%(asctime)s | %(message)s")
|
|
)
|
|
_pipeline_err_log.addHandler(_pe_handler)
|
|
_pipeline_err_log.setLevel(logging.INFO)
|
|
|
|
HEARTBEAT_OK = "HEARTBEAT_OK"
|
|
|
|
# Only tasks in these statuses are eligible for xlsx ->ClickUp matching.
|
|
# "to do" is excluded to prevent accidental matches and AutoCora race conditions.
|
|
# To force-reuse an xlsx for a "to do" task, set status to "running cora" first.
|
|
_CORA_ELIGIBLE_STATUSES = frozenset({"running cora", "error"})
|
|
|
|
|
|
class Scheduler:
|
|
# Tasks due within this window are eligible for execution
|
|
DUE_DATE_WINDOW_WEEKS = 3
|
|
|
|
def __init__(
|
|
self,
|
|
config: Config,
|
|
db: Database,
|
|
agent: Agent,
|
|
notification_bus: NotificationBus | None = None,
|
|
):
|
|
self.config = config
|
|
self.db = db
|
|
self.agent = agent
|
|
self.notification_bus = notification_bus
|
|
self._stop_event = threading.Event()
|
|
self._force_heartbeat = threading.Event()
|
|
self._force_poll = threading.Event()
|
|
self._thread: threading.Thread | None = None
|
|
self._heartbeat_thread: threading.Thread | None = None
|
|
self._clickup_thread: threading.Thread | None = None
|
|
self._folder_watch_thread: threading.Thread | None = None
|
|
self._autocora_thread: threading.Thread | None = None
|
|
self._content_watch_thread: threading.Thread | None = None
|
|
self._cora_distribute_thread: threading.Thread | None = None
|
|
self._briefing_thread: threading.Thread | None = None
|
|
self._force_autocora = threading.Event()
|
|
self._force_briefing = threading.Event()
|
|
self._last_briefing_date: str | None = None
|
|
self._clickup_client = None
|
|
self._field_filter_cache: dict | None = None
|
|
self._loop_timestamps: dict[str, str | None] = {
|
|
"heartbeat": None,
|
|
"poll": None,
|
|
"clickup": None,
|
|
"folder_watch": None,
|
|
"autocora": None,
|
|
"content_watch": None,
|
|
"cora_distribute": None,
|
|
"briefing": None,
|
|
}
|
|
self._active_executions: dict[str, dict] = {}
|
|
self._active_lock = threading.Lock()
|
|
self._plural_cache: dict[tuple[str, str], bool] = {}
|
|
|
|
def _llm_plural_check(self, a: str, b: str) -> bool:
|
|
"""Ask the chat brain if two keywords are the same aside from plural form.
|
|
|
|
Uses OpenRouter with the configured CHEDDAH_CHAT_MODEL. Results are
|
|
cached for the session to avoid repeat calls.
|
|
"""
|
|
key = (a, b) if a <= b else (b, a)
|
|
if key in self._plural_cache:
|
|
return self._plural_cache[key]
|
|
|
|
api_key = self.config.openrouter_api_key
|
|
model = self.config.chat_model
|
|
if not api_key:
|
|
log.warning("LLM plural check: no OpenRouter API key, returning False")
|
|
return False
|
|
|
|
try:
|
|
resp = httpx.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
json={
|
|
"model": model,
|
|
"max_tokens": 5,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
"You compare SEO keywords. Reply with ONLY 'YES' or 'NO'. "
|
|
"Answer YES only if the two keywords are identical except for "
|
|
"singular vs plural word forms (e.g. 'shaft' vs 'shafts', "
|
|
"'company' vs 'companies'). Answer NO if they differ in any "
|
|
"other way (extra words, different words, different meaning)."
|
|
),
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f'Keyword A: "{a}"\nKeyword B: "{b}"',
|
|
},
|
|
],
|
|
},
|
|
timeout=15,
|
|
)
|
|
resp.raise_for_status()
|
|
answer = resp.json()["choices"][0]["message"]["content"].strip()
|
|
result = "YES" in answer.upper()
|
|
log.debug("LLM plural check: '%s' vs '%s' ->%s (%s)", a, b, result, answer)
|
|
except Exception as e:
|
|
log.warning("LLM plural check failed for '%s' vs '%s': %s", a, b, e)
|
|
result = False
|
|
|
|
self._plural_cache[key] = result
|
|
return result
|
|
|
|
def start(self):
|
|
"""Start the scheduler, heartbeat, and ClickUp threads."""
|
|
self._thread = threading.Thread(target=self._poll_loop, daemon=True, name="scheduler")
|
|
self._thread.start()
|
|
|
|
self._heartbeat_thread = threading.Thread(
|
|
target=self._heartbeat_loop, daemon=True, name="heartbeat"
|
|
)
|
|
self._heartbeat_thread.start()
|
|
|
|
# Start ClickUp polling if configured
|
|
if self.config.clickup.enabled:
|
|
self._clickup_thread = threading.Thread(
|
|
target=self._clickup_loop, daemon=True, name="clickup"
|
|
)
|
|
self._clickup_thread.start()
|
|
log.info(
|
|
"ClickUp polling started (interval=%dm)", self.config.clickup.poll_interval_minutes
|
|
)
|
|
else:
|
|
log.info("ClickUp integration disabled (no API token)")
|
|
|
|
# Start folder watcher if configured
|
|
watch_folder = self.config.link_building.watch_folder
|
|
if watch_folder:
|
|
self._folder_watch_thread = threading.Thread(
|
|
target=self._folder_watch_loop, daemon=True, name="folder-watch"
|
|
)
|
|
self._folder_watch_thread.start()
|
|
log.info(
|
|
"Folder watcher started (folder=%s, interval=%dm)",
|
|
watch_folder,
|
|
self.config.link_building.watch_interval_minutes,
|
|
)
|
|
else:
|
|
log.info("Folder watcher disabled (no watch_folder configured)")
|
|
|
|
# Start AutoCora result polling if configured
|
|
if self.config.autocora.enabled:
|
|
self._autocora_thread = threading.Thread(
|
|
target=self._autocora_loop, daemon=True, name="autocora"
|
|
)
|
|
self._autocora_thread.start()
|
|
log.info(
|
|
"AutoCora polling started (interval=%dm)",
|
|
self.config.autocora.poll_interval_minutes,
|
|
)
|
|
else:
|
|
log.info("AutoCora polling disabled")
|
|
|
|
# Start content folder watcher if configured
|
|
content_inbox = self.config.content.cora_inbox
|
|
if content_inbox:
|
|
self._content_watch_thread = threading.Thread(
|
|
target=self._content_watch_loop, daemon=True, name="content-watch"
|
|
)
|
|
self._content_watch_thread.start()
|
|
log.info(
|
|
"Content folder watcher started (folder=%s, interval=%dm)",
|
|
content_inbox,
|
|
self.config.link_building.watch_interval_minutes,
|
|
)
|
|
else:
|
|
log.info("Content folder watcher disabled (no cora_inbox configured)")
|
|
|
|
# Start Cora distribution watcher if configured
|
|
cora_human_inbox = self.config.autocora.cora_human_inbox
|
|
if cora_human_inbox:
|
|
self._cora_distribute_thread = threading.Thread(
|
|
target=self._cora_distribute_loop, daemon=True, name="cora-distribute"
|
|
)
|
|
self._cora_distribute_thread.start()
|
|
log.info(
|
|
"Cora distribution watcher started (folder=%s, interval=%dm)",
|
|
cora_human_inbox,
|
|
self.config.link_building.watch_interval_minutes,
|
|
)
|
|
else:
|
|
log.info("Cora distribution watcher disabled (no cora_human_inbox configured)")
|
|
|
|
# Start morning briefing loop if ClickUp is configured
|
|
if self.config.clickup.enabled:
|
|
self._briefing_thread = threading.Thread(
|
|
target=self._briefing_loop, daemon=True, name="briefing"
|
|
)
|
|
self._briefing_thread.start()
|
|
log.info("Morning briefing loop started")
|
|
else:
|
|
log.info("Morning briefing disabled (ClickUp not configured)")
|
|
|
|
log.info(
|
|
"Scheduler started (poll=%ds, heartbeat=%dm)",
|
|
self.config.scheduler.poll_interval_seconds,
|
|
self.config.scheduler.heartbeat_interval_minutes,
|
|
)
|
|
|
|
def stop(self):
|
|
self._stop_event.set()
|
|
if self._clickup_client:
|
|
self._clickup_client.close()
|
|
|
|
def _notify(self, message: str, category: str = "clickup"):
|
|
"""Push a notification through the bus if available."""
|
|
if self.notification_bus:
|
|
self.notification_bus.push(message, category)
|
|
else:
|
|
log.info("Notification [%s]: %s", category, message)
|
|
|
|
# ── Loop control ──
|
|
|
|
def _interruptible_wait(self, seconds: float, force_event: threading.Event | None = None):
|
|
"""Wait for *seconds*, returning early if stop or force event fires."""
|
|
remaining = seconds
|
|
while remaining > 0 and not self._stop_event.is_set():
|
|
if force_event and force_event.is_set():
|
|
force_event.clear()
|
|
return
|
|
self._stop_event.wait(min(5, remaining))
|
|
remaining -= 5
|
|
|
|
def force_heartbeat(self):
|
|
"""Wake the heartbeat loop immediately."""
|
|
self._force_heartbeat.set()
|
|
|
|
def force_poll(self):
|
|
"""Wake the scheduler poll loop immediately."""
|
|
self._force_poll.set()
|
|
|
|
def force_autocora(self):
|
|
"""Wake the AutoCora poll loop immediately."""
|
|
self._force_autocora.set()
|
|
|
|
def force_briefing(self):
|
|
"""Force the morning briefing to send now (ignores schedule/dedup)."""
|
|
self._last_briefing_date = None
|
|
self._force_briefing.set()
|
|
|
|
def get_loop_timestamps(self) -> dict[str, str | None]:
|
|
"""Return last_run timestamps for all loops (in-memory)."""
|
|
return dict(self._loop_timestamps)
|
|
|
|
def _register_execution(self, task_id: str, name: str, tool_name: str) -> None:
|
|
"""Register a task as actively executing."""
|
|
with self._active_lock:
|
|
self._active_executions[task_id] = {
|
|
"name": name,
|
|
"tool": tool_name,
|
|
"started_at": datetime.now(UTC),
|
|
"thread": threading.current_thread().name,
|
|
}
|
|
|
|
def _unregister_execution(self, task_id: str) -> None:
|
|
"""Remove a task from the active executions registry."""
|
|
with self._active_lock:
|
|
self._active_executions.pop(task_id, None)
|
|
|
|
def get_active_executions(self) -> dict[str, dict]:
|
|
"""Return a snapshot of currently executing tasks."""
|
|
with self._active_lock:
|
|
return dict(self._active_executions)
|
|
|
|
# ── Scheduled Tasks ──
|
|
|
|
def _poll_loop(self):
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._run_due_tasks()
|
|
self._loop_timestamps["poll"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("Scheduler poll error: %s", e)
|
|
self._interruptible_wait(self.config.scheduler.poll_interval_seconds, self._force_poll)
|
|
|
|
def _run_due_tasks(self):
|
|
tasks = self.db.get_due_tasks()
|
|
for task in tasks:
|
|
try:
|
|
log.info("Running scheduled task: %s", task["name"])
|
|
result = self.agent.execute_task(task["prompt"])
|
|
self.db.log_task_run(task["id"], result=result[:2000])
|
|
|
|
# Calculate next run
|
|
schedule = task["schedule"]
|
|
if schedule.startswith("once:"):
|
|
# One-time task, disable it
|
|
self.db.disable_task(task["id"])
|
|
else:
|
|
# Cron schedule - calculate next run
|
|
now = datetime.now(UTC)
|
|
cron = croniter(schedule, now)
|
|
next_run = cron.get_next(datetime)
|
|
self.db.update_task_next_run(task["id"], next_run.isoformat())
|
|
except Exception as e:
|
|
log.error("Task '%s' failed: %s", task["name"], e)
|
|
self.db.log_task_run(task["id"], error=str(e))
|
|
|
|
# ── Heartbeat ──
|
|
|
|
def _heartbeat_loop(self):
|
|
interval = self.config.scheduler.heartbeat_interval_minutes * 60
|
|
# Wait a bit before first heartbeat
|
|
self._stop_event.wait(60)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._run_heartbeat()
|
|
self._loop_timestamps["heartbeat"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("Heartbeat error: %s", e)
|
|
self._interruptible_wait(interval, self._force_heartbeat)
|
|
|
|
def _run_heartbeat(self):
|
|
heartbeat_path = self.config.identity_dir / "HEARTBEAT.md"
|
|
if not heartbeat_path.exists():
|
|
return
|
|
|
|
checklist = heartbeat_path.read_text(encoding="utf-8")
|
|
prompt = (
|
|
f"HEARTBEAT CHECK. Review this checklist and take action if needed.\n"
|
|
f"If nothing needs attention, respond with exactly: {HEARTBEAT_OK}\n\n"
|
|
f"{checklist}"
|
|
)
|
|
|
|
result = self.agent.execute_task(prompt, system_context=checklist)
|
|
|
|
if HEARTBEAT_OK in result:
|
|
log.debug("Heartbeat: all clear")
|
|
else:
|
|
log.info("Heartbeat action taken: %s", result[:200])
|
|
|
|
# ── ClickUp Integration ──
|
|
|
|
def _get_clickup_client(self):
|
|
"""Lazy-init the ClickUp API client."""
|
|
if self._clickup_client is None:
|
|
from .clickup import ClickUpClient
|
|
|
|
self._clickup_client = ClickUpClient(
|
|
api_token=self.config.clickup.api_token,
|
|
workspace_id=self.config.clickup.workspace_id,
|
|
task_type_field_name=self.config.clickup.task_type_field_name,
|
|
)
|
|
return self._clickup_client
|
|
|
|
# Maximum time a task can stay in "automation underway" before recovery (seconds)
|
|
STALE_TASK_THRESHOLD_SECONDS = 6 * 60 * 60 # 6 hours
|
|
|
|
def _clickup_loop(self):
|
|
"""Poll ClickUp for tasks on a regular interval."""
|
|
interval = self.config.clickup.poll_interval_minutes * 60
|
|
|
|
# Wait before first poll to let other systems initialize
|
|
self._stop_event.wait(30)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._poll_clickup()
|
|
self._recover_stale_tasks()
|
|
self._loop_timestamps["clickup"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("ClickUp poll error: %s", e)
|
|
self._interruptible_wait(interval)
|
|
|
|
def _discover_field_filter(self, client):
|
|
"""Discover and cache the Work Category field UUID + option map."""
|
|
space_id = self.config.clickup.space_id
|
|
list_ids = client.get_list_ids_from_space(space_id)
|
|
if not list_ids:
|
|
log.warning("No lists found in space %s — cannot discover field filter", space_id)
|
|
return None
|
|
|
|
# Use the first list to discover field metadata
|
|
first_list = next(iter(list_ids))
|
|
field_name = self.config.clickup.task_type_field_name
|
|
result = client.discover_field_filter(first_list, field_name)
|
|
if result:
|
|
log.info(
|
|
"Discovered field filter for '%s': field_id=%s, options=%s",
|
|
field_name,
|
|
result["field_id"],
|
|
list(result["options"].keys()),
|
|
)
|
|
else:
|
|
log.warning(
|
|
"Field '%s' not found in list %s — falling back to client-side filtering",
|
|
field_name,
|
|
first_list,
|
|
)
|
|
return result
|
|
|
|
def _poll_clickup(self):
|
|
"""Poll ClickUp for eligible tasks and execute them immediately."""
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
log.warning("ClickUp space_id not configured, skipping poll")
|
|
return
|
|
|
|
skill_map = self.config.clickup.skill_map
|
|
if not skill_map:
|
|
log.debug("No skill_map configured, skipping ClickUp poll")
|
|
return
|
|
|
|
# Discover field filter on first poll
|
|
if self._field_filter_cache is None:
|
|
self._field_filter_cache = self._discover_field_filter(client) or {}
|
|
|
|
# Build API filters
|
|
now_ms = int(datetime.now(UTC).timestamp() * 1000)
|
|
due_date_lt = now_ms + (self.DUE_DATE_WINDOW_WEEKS * 7 * 24 * 60 * 60 * 1000)
|
|
|
|
# Explicit allowlist of Work Category values to poll
|
|
allowed_types = self.config.clickup.poll_task_types or list(skill_map.keys())
|
|
|
|
custom_fields_filter = None
|
|
if self._field_filter_cache and self._field_filter_cache.get("options"):
|
|
field_id = self._field_filter_cache["field_id"]
|
|
options = self._field_filter_cache["options"]
|
|
matching_opt_ids = [options[name] for name in allowed_types if name in options]
|
|
if matching_opt_ids:
|
|
import json as _json
|
|
|
|
custom_fields_filter = _json.dumps(
|
|
[{"field_id": field_id, "operator": "ANY", "value": matching_opt_ids}]
|
|
)
|
|
|
|
tasks = client.get_tasks_from_space(
|
|
space_id,
|
|
statuses=self.config.clickup.poll_statuses,
|
|
due_date_lt=due_date_lt,
|
|
custom_fields=custom_fields_filter,
|
|
)
|
|
|
|
for task in tasks:
|
|
# Client-side verify: Work Category must be in allowed types AND skill_map
|
|
if task.task_type not in allowed_types or task.task_type not in skill_map:
|
|
continue
|
|
|
|
# Respect auto_execute flag — skip tasks that require manual trigger
|
|
# Unless the task status matches an auto_execute_on_status entry
|
|
mapping = skill_map[task.task_type]
|
|
if not mapping.get("auto_execute", False):
|
|
status_triggers = mapping.get("auto_execute_on_status", [])
|
|
if task.status.lower() not in [s.lower() for s in status_triggers]:
|
|
hint = mapping.get("trigger_hint", "manual trigger only")
|
|
log.debug(
|
|
"Skipping task '%s' (type=%s): auto_execute is false (%s)",
|
|
task.name,
|
|
task.task_type,
|
|
hint,
|
|
)
|
|
continue
|
|
|
|
# Client-side verify: due_date must exist and be within window
|
|
if not task.due_date:
|
|
continue
|
|
try:
|
|
task_due_ms = int(task.due_date)
|
|
if task_due_ms > due_date_lt:
|
|
continue
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
self._execute_task(task)
|
|
|
|
def _execute_task(self, task):
|
|
"""Execute a single ClickUp task immediately.
|
|
|
|
Tools own their own ClickUp sync (status, comments, attachments).
|
|
The scheduler just calls the tool and handles errors.
|
|
"""
|
|
skill_map = self.config.clickup.skill_map
|
|
mapping = skill_map.get(task.task_type, {})
|
|
tool_name = mapping.get("tool", "")
|
|
if not tool_name:
|
|
log.warning("No tool in skill_map for type '%s'", task.task_type)
|
|
return
|
|
|
|
task_id = task.id
|
|
client = self._get_clickup_client()
|
|
|
|
# Validate required fields before starting
|
|
args = self._build_tool_args_from_task(task, mapping)
|
|
required = mapping.get("required_fields", [])
|
|
missing = [f for f in required if not args.get(f)]
|
|
if missing:
|
|
field_mapping = mapping.get("field_mapping", {})
|
|
missing_clickup = [field_mapping.get(f, f) for f in missing]
|
|
msg = f"Skipped: missing required field(s): {', '.join(missing_clickup)}"
|
|
log.debug("Skipping ClickUp task %s (%s) — %s", task_id, task.name, msg)
|
|
self._notify(
|
|
f"Skipped ClickUp task: **{task.name}**\n{msg}",
|
|
category="clickup",
|
|
)
|
|
return
|
|
|
|
# Move to "automation underway" on ClickUp immediately
|
|
client.update_task_status(task_id, self.config.clickup.automation_status)
|
|
|
|
log.info("Executing ClickUp task: %s ->%s", task.name, tool_name)
|
|
self._notify(f"Executing ClickUp task: **{task.name}** ->Skill: `{tool_name}`")
|
|
|
|
self._register_execution(task_id, task.name, tool_name)
|
|
try:
|
|
# args already built during validation above
|
|
args["clickup_task_id"] = task_id
|
|
args["clickup_task_status"] = task.status
|
|
|
|
# Map Work Category to content_type so create_content routes correctly
|
|
if tool_name == "create_content":
|
|
if task.task_type == "On Page Optimization":
|
|
args["content_type"] = "on page optimization"
|
|
elif task.task_type == "Content Creation":
|
|
args["content_type"] = "new content"
|
|
|
|
# Execute the skill via the tool registry
|
|
if hasattr(self.agent, "_tools") and self.agent._tools:
|
|
result = self.agent._tools.execute(tool_name, args)
|
|
else:
|
|
result = self.agent.execute_task(
|
|
f"Execute the '{tool_name}' tool for ClickUp task '{task.name}'. "
|
|
f"Task description: {task.custom_fields}"
|
|
)
|
|
|
|
# Check if the tool skipped or reported an error without doing work
|
|
if result.startswith("Skipped:") or result.startswith("Error:"):
|
|
client.add_comment(
|
|
task_id,
|
|
f"⚠️ CheddahBot could not execute this task.\n\n{result[:2000]}",
|
|
)
|
|
client.update_task_status(task_id, self.config.clickup.error_status)
|
|
|
|
self._notify(f"ClickUp task skipped: **{task.name}**\nReason: {result[:200]}")
|
|
log.debug("ClickUp task skipped: %s — %s", task.name, result[:200])
|
|
return
|
|
|
|
# Tool handled its own ClickUp sync — just log success
|
|
self._notify(f"**{task.name}** done — ran `{tool_name}` successfully.")
|
|
log.info("ClickUp task completed: %s", task.name)
|
|
|
|
except Exception as e:
|
|
client.add_comment(
|
|
task_id, f"❌ CheddahBot failed to complete this task.\n\nError: {str(e)[:2000]}"
|
|
)
|
|
client.update_task_status(task_id, self.config.clickup.error_status)
|
|
|
|
self._notify(
|
|
f"ClickUp task failed: **{task.name}**\n"
|
|
f"Skill: `{tool_name}` | Error: {str(e)[:200]}"
|
|
)
|
|
log.error("ClickUp task failed: %s — %s", task.name, e)
|
|
finally:
|
|
self._unregister_execution(task_id)
|
|
|
|
def _recover_stale_tasks(self):
|
|
"""Reset tasks stuck in 'automation underway' for too long.
|
|
|
|
If a task has been in the automation status for more than
|
|
STALE_TASK_THRESHOLD_SECONDS (default 2 hours), reset it to
|
|
the first poll status (usually 'to do') so it gets retried.
|
|
"""
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
return
|
|
|
|
automation_status = self.config.clickup.automation_status
|
|
try:
|
|
stale_tasks = client.get_tasks_from_space(space_id, statuses=[automation_status])
|
|
except Exception as e:
|
|
log.warning("Failed to query stale tasks: %s", e)
|
|
return
|
|
|
|
now_ms = int(datetime.now(UTC).timestamp() * 1000)
|
|
threshold_ms = self.STALE_TASK_THRESHOLD_SECONDS * 1000
|
|
|
|
for task in stale_tasks:
|
|
if not task.date_updated:
|
|
continue
|
|
try:
|
|
updated_ms = int(task.date_updated)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
age_ms = now_ms - updated_ms
|
|
if age_ms > threshold_ms:
|
|
poll_sts = self.config.clickup.poll_statuses
|
|
reset_status = poll_sts[0] if poll_sts else "to do"
|
|
log.warning(
|
|
"Recovering stale task %s (%s) — stuck in '%s' for %.1f hours",
|
|
task.id,
|
|
task.name,
|
|
automation_status,
|
|
age_ms / 3_600_000,
|
|
)
|
|
client.update_task_status(task.id, reset_status)
|
|
client.add_comment(
|
|
task.id,
|
|
f"⚠️ CheddahBot auto-recovered this task. It was stuck in "
|
|
f"'{automation_status}' for {age_ms / 3_600_000:.1f} hours. "
|
|
f"Reset to '{reset_status}' for retry.",
|
|
)
|
|
self._notify(
|
|
f"Recovered stale task: **{task.name}** — "
|
|
f"reset from '{automation_status}' to '{reset_status}'",
|
|
category="clickup",
|
|
)
|
|
|
|
def _build_tool_args_from_task(self, task, mapping: dict) -> dict:
|
|
"""Build tool arguments from a ClickUp task using the field mapping."""
|
|
field_mapping = mapping.get("field_mapping", {})
|
|
|
|
args = {}
|
|
for tool_param, source in field_mapping.items():
|
|
if source == "task_name":
|
|
args[tool_param] = task.name
|
|
elif source == "task_description":
|
|
args[tool_param] = task.custom_fields.get("description", "")
|
|
else:
|
|
# Look up custom field by name
|
|
args[tool_param] = task.custom_fields.get(source, "")
|
|
|
|
return args
|
|
|
|
# ── AutoCora Result Polling ──
|
|
|
|
def _autocora_loop(self):
|
|
"""Auto-submit jobs for today's tasks, then poll for results."""
|
|
interval = self.config.autocora.poll_interval_minutes * 60
|
|
|
|
# Wait before first poll
|
|
self._stop_event.wait(30)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._auto_submit_cora_jobs()
|
|
self._poll_autocora_results()
|
|
self._loop_timestamps["autocora"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("AutoCora poll error: %s", e)
|
|
self._interruptible_wait(interval, self._force_autocora)
|
|
|
|
def _auto_submit_cora_jobs(self):
|
|
"""Auto-submit AutoCora jobs using multi-pass sweep (no explicit date)."""
|
|
from .tools.autocora import submit_autocora_jobs
|
|
|
|
if not self.config.clickup.api_token:
|
|
return
|
|
|
|
ctx = {
|
|
"config": self.config,
|
|
"db": self.db,
|
|
"agent": self.agent,
|
|
}
|
|
result = submit_autocora_jobs(ctx=ctx)
|
|
log.info("AutoCora auto-submit (sweep): %s", result)
|
|
|
|
def _poll_autocora_results(self):
|
|
"""Check for completed AutoCora results and update ClickUp tasks.
|
|
|
|
Scans the results folder for .result files. Each file contains JSON
|
|
with task_ids and status. After processing, moves the file to
|
|
results/processed/ to prevent re-processing.
|
|
"""
|
|
from .tools.autocora import _parse_result
|
|
|
|
autocora = self.config.autocora
|
|
results_dir = Path(autocora.results_dir)
|
|
|
|
if not results_dir.exists():
|
|
log.debug("AutoCora results dir does not exist: %s", results_dir)
|
|
return
|
|
|
|
result_files = list(results_dir.glob("*.result"))
|
|
if not result_files:
|
|
return
|
|
|
|
client = self._get_clickup_client() if self.config.clickup.api_token else None
|
|
processed_dir = results_dir / "processed"
|
|
|
|
for result_path in result_files:
|
|
raw = result_path.read_text(encoding="utf-8").strip()
|
|
result_data = _parse_result(raw)
|
|
|
|
task_ids = result_data.get("task_ids", [])
|
|
status = result_data.get("status", "UNKNOWN")
|
|
keyword = result_data.get("keyword", result_path.stem)
|
|
|
|
if status == "SUCCESS":
|
|
if client and task_ids:
|
|
for tid in task_ids:
|
|
client.update_task_status(tid, autocora.success_status)
|
|
client.add_comment(tid, f"Cora report generated for \"{keyword}\" — ready for you to look at it.")
|
|
|
|
self._notify(
|
|
f"AutoCora SUCCESS: **{keyword}** — "
|
|
f"{len(task_ids)} task(s) moved to '{autocora.success_status}'",
|
|
category="autocora",
|
|
)
|
|
|
|
elif status == "FAILURE":
|
|
reason = result_data.get("reason", "unknown error")
|
|
if client and task_ids:
|
|
for tid in task_ids:
|
|
client.update_task_status(tid, autocora.error_status)
|
|
client.add_comment(
|
|
tid,
|
|
f"Cora report failed for keyword: {keyword}\nReason: {reason}",
|
|
)
|
|
|
|
self._notify(
|
|
f"AutoCora FAILURE: **{keyword}** — {reason}",
|
|
category="autocora",
|
|
)
|
|
|
|
log.debug("AutoCora result for '%s': %s", keyword, status)
|
|
|
|
# Move result file to processed/
|
|
processed_dir.mkdir(exist_ok=True)
|
|
try:
|
|
result_path.rename(processed_dir / result_path.name)
|
|
except OSError as e:
|
|
log.warning("Could not move result file %s: %s", result_path.name, e)
|
|
# If it already exists in processed/, delete the source to stop reprocessing
|
|
if (processed_dir / result_path.name).exists():
|
|
result_path.unlink(missing_ok=True)
|
|
|
|
# ── Folder Watcher ──
|
|
|
|
def _folder_watch_loop(self):
|
|
"""Poll the watch folder for new .xlsx files on a regular interval."""
|
|
interval = self.config.link_building.watch_interval_minutes * 60
|
|
|
|
# Wait before first scan to let other systems initialize
|
|
self._stop_event.wait(60)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._scan_watch_folder()
|
|
self._loop_timestamps["folder_watch"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("Folder watcher error: %s", e)
|
|
self._interruptible_wait(interval)
|
|
|
|
def _scan_watch_folder(self):
|
|
"""Scan the watch folder for new .xlsx files and match to ClickUp tasks."""
|
|
watch_folder = Path(self.config.link_building.watch_folder)
|
|
if not watch_folder.exists():
|
|
log.warning("Watch folder does not exist: %s", watch_folder)
|
|
return
|
|
|
|
xlsx_files = sorted(watch_folder.glob("*.xlsx"))
|
|
if not xlsx_files:
|
|
log.debug("No .xlsx files in watch folder")
|
|
return
|
|
|
|
# Check processed/ subfolder for already-handled files
|
|
processed_dir = watch_folder / "processed"
|
|
processed_names = set()
|
|
if processed_dir.exists():
|
|
processed_names = {f.name for f in processed_dir.glob("*.xlsx")}
|
|
|
|
for xlsx_path in xlsx_files:
|
|
filename = xlsx_path.name
|
|
# Skip Office temp/lock files (e.g. ~$insert_molding.xlsx)
|
|
if filename.startswith("~$"):
|
|
continue
|
|
# Skip files already in processed/
|
|
if filename in processed_names:
|
|
continue
|
|
|
|
log.info("Folder watcher: new .xlsx found: %s", filename)
|
|
self._process_watched_file(xlsx_path)
|
|
|
|
def _process_watched_file(self, xlsx_path: Path):
|
|
"""Try to match a watched .xlsx file to a ClickUp task and run the pipeline."""
|
|
filename = xlsx_path.name
|
|
# Normalize filename stem for matching
|
|
# e.g., "precision-cnc-machining" ->"precision cnc machining"
|
|
stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ")
|
|
stem = re.sub(r"\s+", " ", stem).strip()
|
|
|
|
# Try to find matching ClickUp task
|
|
matched_task = None
|
|
if self.config.clickup.enabled:
|
|
matched_task = self._match_xlsx_to_clickup(stem)
|
|
|
|
if not matched_task:
|
|
log.warning("No ClickUp task match for '%s' — skipping", filename)
|
|
self._notify(
|
|
f"Folder watcher: no ClickUp match for **{filename}**.\n"
|
|
f"Create a Link Building task with Keyword "
|
|
f"matching '{stem}' to enable auto-processing.",
|
|
category="linkbuilding",
|
|
)
|
|
return
|
|
|
|
# Extract tool args from matched task
|
|
task_id = matched_task.id
|
|
log.info("Matched '%s' to ClickUp task %s (%s)", filename, task_id, matched_task.name)
|
|
|
|
# Set ClickUp status to "automation underway"
|
|
client = self._get_clickup_client()
|
|
client.update_task_status(task_id, self.config.clickup.automation_status)
|
|
|
|
self._notify(
|
|
f"Folder watcher: matched **{filename}** to ClickUp task **{matched_task.name}**.\n"
|
|
f"Starting Cora Backlinks pipeline...",
|
|
category="linkbuilding",
|
|
)
|
|
|
|
# Build tool args from the matched task's custom fields
|
|
money_site_url = matched_task.custom_fields.get("IMSURL", "") or ""
|
|
if not money_site_url:
|
|
log.warning("Task %s (%s) missing IMSURL — skipping", task_id, matched_task.name)
|
|
client.add_comment(
|
|
task_id,
|
|
"❌ Link building skipped — IMSURL field is empty. "
|
|
"Set the IMSURL field in ClickUp so the pipeline knows where to build links.",
|
|
)
|
|
client.update_task_status(task_id, self.config.clickup.error_status)
|
|
self._notify(
|
|
f"Folder watcher: **{filename}** matched task **{matched_task.name}** "
|
|
f"but **IMSURL is empty**. Set the IMSURL field in ClickUp before "
|
|
f"the file can be processed.",
|
|
category="linkbuilding",
|
|
)
|
|
return
|
|
|
|
args = {
|
|
"xlsx_path": str(xlsx_path),
|
|
"project_name": matched_task.custom_fields.get("Keyword", "") or matched_task.name,
|
|
"money_site_url": money_site_url,
|
|
"custom_anchors": matched_task.custom_fields.get("CustomAnchors", "") or "",
|
|
"cli_flags": matched_task.custom_fields.get("CLIFlags", "") or "",
|
|
"clickup_task_id": task_id,
|
|
}
|
|
|
|
# Parse branded_plus_ratio
|
|
bp_raw = matched_task.custom_fields.get("BrandedPlusRatio", "")
|
|
if bp_raw:
|
|
with contextlib.suppress(ValueError, TypeError):
|
|
args["branded_plus_ratio"] = float(bp_raw)
|
|
|
|
self._register_execution(task_id, matched_task.name, "run_cora_backlinks")
|
|
try:
|
|
# Execute via tool registry
|
|
if hasattr(self.agent, "_tools") and self.agent._tools:
|
|
result = self.agent._tools.execute("run_cora_backlinks", args)
|
|
else:
|
|
result = "Error: tool registry not available"
|
|
|
|
if "Error" in result and "## Step" not in result:
|
|
# Pipeline failed — tool handles its own ClickUp error status
|
|
_pipeline_err_log.info(
|
|
"LINKBUILDING | task=%s | file=%s | result=%s",
|
|
task_id, filename, result[:500],
|
|
)
|
|
self._notify(
|
|
f"Folder watcher: pipeline **failed** for **{filename}**.\n"
|
|
f"Error: {result[:200]}",
|
|
category="linkbuilding",
|
|
)
|
|
else:
|
|
# Success — move file to processed/
|
|
processed_dir = xlsx_path.parent / "processed"
|
|
processed_dir.mkdir(exist_ok=True)
|
|
dest = processed_dir / filename
|
|
try:
|
|
shutil.move(str(xlsx_path), str(dest))
|
|
log.info("Moved %s to %s", filename, dest)
|
|
except OSError as e:
|
|
log.warning("Could not move %s to processed: %s", filename, e)
|
|
|
|
self._notify(
|
|
f"Link building finished for **{matched_task.name}** — "
|
|
f"Cora backlinks placed, XLSX moved to processed.",
|
|
category="linkbuilding",
|
|
)
|
|
|
|
except Exception as e:
|
|
log.error("Folder watcher pipeline error for %s: %s", filename, e)
|
|
client.add_comment(
|
|
task_id,
|
|
f"❌ Link building pipeline crashed.\n\nError: {str(e)[:2000]}",
|
|
)
|
|
client.update_task_status(task_id, self.config.clickup.error_status)
|
|
finally:
|
|
self._unregister_execution(task_id)
|
|
|
|
def _match_xlsx_to_clickup(self, normalized_stem: str):
|
|
"""Find a ClickUp Link Building task whose Keyword matches the file stem.
|
|
|
|
Returns the matched ClickUpTask or None.
|
|
"""
|
|
from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match
|
|
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
return None
|
|
|
|
try:
|
|
tasks = client.get_tasks_from_overall_lists(space_id)
|
|
except Exception as e:
|
|
log.warning("ClickUp query failed in _match_xlsx_to_clickup: %s", e)
|
|
return None
|
|
|
|
for task in tasks:
|
|
if task.status not in _CORA_ELIGIBLE_STATUSES:
|
|
continue
|
|
if task.task_type != "Link Building":
|
|
continue
|
|
|
|
lb_method = task.custom_fields.get("LB Method", "")
|
|
if lb_method and lb_method != "Cora Backlinks":
|
|
continue
|
|
|
|
keyword = task.custom_fields.get("Keyword", "")
|
|
if not keyword:
|
|
continue
|
|
|
|
keyword_norm = _normalize_for_match(str(keyword))
|
|
if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check):
|
|
return task
|
|
|
|
return None
|
|
|
|
# ── Content Folder Watcher ──
|
|
|
|
def _content_watch_loop(self):
|
|
"""Poll the content Cora inbox for new .xlsx files on a regular interval."""
|
|
interval = self.config.link_building.watch_interval_minutes * 60
|
|
|
|
# Wait before first scan to let other systems initialize
|
|
self._stop_event.wait(60)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._scan_content_folder()
|
|
self._loop_timestamps["content_watch"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("Content folder watcher error: %s", e)
|
|
self._interruptible_wait(interval)
|
|
|
|
def _scan_content_folder(self):
|
|
"""Scan the content Cora inbox for new .xlsx files and match to ClickUp tasks."""
|
|
inbox = Path(self.config.content.cora_inbox)
|
|
if not inbox.exists():
|
|
log.warning("Content Cora inbox does not exist: %s", inbox)
|
|
return
|
|
|
|
xlsx_files = sorted(inbox.glob("*.xlsx"))
|
|
if not xlsx_files:
|
|
log.debug("No .xlsx files in content Cora inbox")
|
|
return
|
|
|
|
# Check processed/ subfolder for already-handled files
|
|
processed_dir = inbox / "processed"
|
|
processed_names = set()
|
|
if processed_dir.exists():
|
|
processed_names = {f.name for f in processed_dir.glob("*.xlsx")}
|
|
|
|
for xlsx_path in xlsx_files:
|
|
filename = xlsx_path.name
|
|
# Skip Office temp/lock files
|
|
if filename.startswith("~$"):
|
|
continue
|
|
# Skip files already in processed/
|
|
if filename in processed_names:
|
|
continue
|
|
|
|
log.info("Content watcher: new .xlsx found: %s", filename)
|
|
self._process_content_file(xlsx_path)
|
|
|
|
def _process_content_file(self, xlsx_path: Path):
|
|
"""Match a content Cora .xlsx to a ClickUp task and run create_content."""
|
|
filename = xlsx_path.name
|
|
stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ")
|
|
stem = re.sub(r"\s+", " ", stem).strip()
|
|
|
|
# Try to find matching ClickUp task
|
|
matched_task = None
|
|
if self.config.clickup.enabled:
|
|
matched_task = self._match_xlsx_to_content_task(stem)
|
|
|
|
if not matched_task:
|
|
log.warning("No ClickUp content task match for '%s' — skipping", filename)
|
|
self._notify(
|
|
f"Content watcher: no ClickUp match for **{filename}**.\n"
|
|
f"Create a Content Creation or On Page Optimization task with Keyword "
|
|
f"matching '{stem}' to enable auto-processing.",
|
|
category="content",
|
|
)
|
|
return
|
|
|
|
task_id = matched_task.id
|
|
log.info("Matched '%s' to ClickUp task %s (%s)", filename, task_id, matched_task.name)
|
|
|
|
# Set ClickUp status to "automation underway"
|
|
client = self._get_clickup_client()
|
|
client.update_task_status(task_id, self.config.clickup.automation_status)
|
|
|
|
self._notify(
|
|
f"Content watcher: matched **{filename}** to ClickUp task "
|
|
f"**{matched_task.name}**.\nStarting content creation pipeline...",
|
|
category="content",
|
|
)
|
|
|
|
# Extract fields from the matched task
|
|
keyword = matched_task.custom_fields.get("Keyword", "") or matched_task.name
|
|
url = matched_task.custom_fields.get("IMSURL", "") or ""
|
|
cli_flags = matched_task.custom_fields.get("CLIFlags", "") or ""
|
|
|
|
args = {
|
|
"keyword": str(keyword),
|
|
"url": str(url),
|
|
"cli_flags": str(cli_flags),
|
|
"clickup_task_id": task_id,
|
|
}
|
|
|
|
self._register_execution(task_id, matched_task.name, "create_content")
|
|
try:
|
|
if hasattr(self.agent, "_tools") and self.agent._tools:
|
|
result = self.agent._tools.execute("create_content", args)
|
|
else:
|
|
result = "Error: tool registry not available"
|
|
|
|
if result.startswith("Error:"):
|
|
_pipeline_err_log.info(
|
|
"CONTENT | task=%s | file=%s | result=%s",
|
|
task_id, filename, result[:500],
|
|
)
|
|
self._notify(
|
|
f"Content watcher: pipeline **failed** for **{filename}**.\n"
|
|
f"Error: {result[:200]}",
|
|
category="content",
|
|
)
|
|
else:
|
|
# Success — move file to processed/
|
|
processed_dir = xlsx_path.parent / "processed"
|
|
processed_dir.mkdir(exist_ok=True)
|
|
dest = processed_dir / filename
|
|
try:
|
|
shutil.move(str(xlsx_path), str(dest))
|
|
log.info("Moved %s to %s", filename, dest)
|
|
except OSError as e:
|
|
log.warning("Could not move %s to processed: %s", filename, e)
|
|
|
|
self._notify(
|
|
f"Content optimization finished for **{matched_task.name}** — "
|
|
f"content created, XLSX moved to processed.",
|
|
category="content",
|
|
)
|
|
|
|
except Exception as e:
|
|
log.error("Content watcher pipeline error for %s: %s", filename, e)
|
|
client.add_comment(
|
|
task_id,
|
|
f"❌ Content pipeline crashed.\n\nError: {str(e)[:2000]}",
|
|
)
|
|
client.update_task_status(task_id, self.config.clickup.error_status)
|
|
finally:
|
|
self._unregister_execution(task_id)
|
|
|
|
def _match_xlsx_to_content_task(self, normalized_stem: str):
|
|
"""Find a ClickUp content task whose Keyword matches the file stem.
|
|
|
|
Matches tasks with Work Category in ("Content Creation", "On Page Optimization").
|
|
Returns the matched ClickUpTask or None.
|
|
"""
|
|
from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match
|
|
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
return None
|
|
|
|
try:
|
|
tasks = client.get_tasks_from_overall_lists(space_id)
|
|
except Exception as e:
|
|
log.warning("ClickUp query failed in _match_xlsx_to_content_task: %s", e)
|
|
return None
|
|
|
|
content_types = ("Content Creation", "On Page Optimization")
|
|
for task in tasks:
|
|
if task.status not in _CORA_ELIGIBLE_STATUSES:
|
|
continue
|
|
if task.task_type not in content_types:
|
|
continue
|
|
|
|
keyword = task.custom_fields.get("Keyword", "")
|
|
if not keyword:
|
|
continue
|
|
|
|
keyword_norm = _normalize_for_match(str(keyword))
|
|
if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check):
|
|
return task
|
|
|
|
return None
|
|
|
|
# ── Cora Distribution Watcher ──
|
|
|
|
def _cora_distribute_loop(self):
|
|
"""Poll the human Cora inbox and distribute xlsx to pipeline inboxes."""
|
|
interval = self.config.link_building.watch_interval_minutes * 60
|
|
|
|
# Wait before first scan to let other systems initialize
|
|
self._stop_event.wait(60)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
self._scan_cora_human_inbox()
|
|
self._loop_timestamps["cora_distribute"] = datetime.now(UTC).isoformat()
|
|
except Exception as e:
|
|
log.error("Cora distribution watcher error: %s", e)
|
|
self._interruptible_wait(interval)
|
|
|
|
def _scan_cora_human_inbox(self):
|
|
"""Scan the human Cora inbox for new .xlsx files and distribute them."""
|
|
inbox = Path(self.config.autocora.cora_human_inbox)
|
|
if not inbox.exists():
|
|
log.warning("Cora human inbox does not exist: %s", inbox)
|
|
return
|
|
|
|
xlsx_files = sorted(inbox.glob("*.xlsx"))
|
|
if not xlsx_files:
|
|
log.debug("No .xlsx files in Cora human inbox")
|
|
return
|
|
|
|
# Check processed/ subfolder for already-handled files
|
|
processed_dir = inbox / "processed"
|
|
processed_names = set()
|
|
if processed_dir.exists():
|
|
processed_names = {f.name for f in processed_dir.glob("*.xlsx")}
|
|
|
|
for xlsx_path in xlsx_files:
|
|
filename = xlsx_path.name
|
|
if filename.startswith("~$"):
|
|
continue
|
|
if filename in processed_names:
|
|
continue
|
|
|
|
log.info("Cora distributor: new .xlsx found: %s", filename)
|
|
self._distribute_cora_file(xlsx_path)
|
|
|
|
def _distribute_cora_file(self, xlsx_path: Path):
|
|
"""Match a Cora .xlsx to ClickUp tasks and copy to the right pipeline inboxes."""
|
|
from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match
|
|
|
|
filename = xlsx_path.name
|
|
stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ")
|
|
stem = re.sub(r"\s+", " ", stem).strip()
|
|
|
|
if not self.config.clickup.enabled:
|
|
log.warning("Cora distributor: ClickUp disabled, cannot match '%s'", filename)
|
|
return
|
|
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
return
|
|
|
|
try:
|
|
tasks = client.get_tasks_from_overall_lists(space_id)
|
|
except Exception as e:
|
|
log.warning("ClickUp query failed in _distribute_cora_file: %s", e)
|
|
return
|
|
|
|
# Find ALL matching tasks across all types
|
|
has_lb = False
|
|
has_content = False
|
|
matched_names = []
|
|
matched_error_tasks = []
|
|
|
|
for task in tasks:
|
|
if task.status not in _CORA_ELIGIBLE_STATUSES:
|
|
continue
|
|
keyword = task.custom_fields.get("Keyword", "")
|
|
if not keyword:
|
|
continue
|
|
|
|
keyword_norm = _normalize_for_match(str(keyword))
|
|
if not _fuzzy_keyword_match(stem, keyword_norm, self._llm_plural_check):
|
|
continue
|
|
|
|
matched_names.append(task.name)
|
|
if task.status == self.config.clickup.error_status:
|
|
matched_error_tasks.append(task)
|
|
if task.task_type == "Link Building":
|
|
has_lb = True
|
|
elif task.task_type in ("Content Creation", "On Page Optimization"):
|
|
has_content = True
|
|
|
|
if not has_lb and not has_content:
|
|
log.warning("No ClickUp task match for '%s' — leaving in inbox", filename)
|
|
self._notify(
|
|
f"Cora distributor: no ClickUp match for **{filename}**.\n"
|
|
f"Create a task with Keyword matching '{stem}' to enable distribution.",
|
|
category="autocora",
|
|
)
|
|
return
|
|
|
|
# Copy to the appropriate pipeline inboxes
|
|
copied_to = []
|
|
try:
|
|
if has_lb and self.config.link_building.watch_folder:
|
|
dest_dir = Path(self.config.link_building.watch_folder)
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(str(xlsx_path), str(dest_dir / filename))
|
|
copied_to.append(f"link building ({dest_dir})")
|
|
|
|
if has_content and self.config.content.cora_inbox:
|
|
dest_dir = Path(self.config.content.cora_inbox)
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(str(xlsx_path), str(dest_dir / filename))
|
|
copied_to.append(f"content ({dest_dir})")
|
|
except OSError as e:
|
|
log.error("Cora distributor: copy failed for %s: %s", filename, e)
|
|
self._notify(
|
|
f"Cora distributor: **copy failed** for **{filename}**.\nError: {e}",
|
|
category="autocora",
|
|
)
|
|
return
|
|
|
|
# Reset any matched tasks that were in "error" back to "running cora"
|
|
# so the pipeline picks them up again.
|
|
for task in matched_error_tasks:
|
|
try:
|
|
client.update_task_status(task.id, "running cora")
|
|
client.add_comment(
|
|
task.id,
|
|
f"New Cora XLSX distributed — resetting from error to running cora.",
|
|
)
|
|
log.info("Distributor: reset task %s (%s) from error ->running cora", task.id, task.name)
|
|
except Exception as e:
|
|
log.warning("Distributor: failed to reset task %s: %s", task.id, e)
|
|
|
|
# Move original to processed/
|
|
processed_dir = xlsx_path.parent / "processed"
|
|
processed_dir.mkdir(exist_ok=True)
|
|
try:
|
|
shutil.move(str(xlsx_path), str(processed_dir / filename))
|
|
except OSError as e:
|
|
log.warning("Could not move %s to processed: %s", filename, e)
|
|
|
|
log.info("Cora distributor: %s ->%s", filename, ", ".join(copied_to))
|
|
self._notify(
|
|
f"Cora distributor: **{filename}** copied to {', '.join(copied_to)}.\n"
|
|
f"Matched tasks: {', '.join(matched_names)}",
|
|
category="autocora",
|
|
)
|
|
|
|
# Post ClickUp comment on matched tasks
|
|
self._comment_distributed_tasks(client, xlsx_path.stem, copied_to, tasks, stem)
|
|
|
|
def _comment_distributed_tasks(
|
|
self, client, xlsx_stem: str, copied_to: list[str], tasks, normalized_stem: str
|
|
):
|
|
"""Post a ClickUp comment on tasks when a Cora report is distributed."""
|
|
from .tools.autocora import _slugify
|
|
from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match
|
|
|
|
parts = []
|
|
for dest in copied_to:
|
|
if dest.startswith("link"):
|
|
parts.append("cora-inbox")
|
|
elif dest.startswith("content"):
|
|
parts.append("content-cora-inbox")
|
|
else:
|
|
parts.append(dest)
|
|
dest_label = " and ".join(parts)
|
|
comment = f"Cora XLSX moved to {dest_label}."
|
|
|
|
# Try to find task_ids from job JSON files
|
|
task_ids: list[str] = []
|
|
jobs_dir = Path(self.config.autocora.jobs_dir)
|
|
slug = _slugify(xlsx_stem)
|
|
|
|
if jobs_dir.is_dir():
|
|
# Check both jobs/ root and processed/ subfolder
|
|
search_dirs = [jobs_dir]
|
|
processed = jobs_dir / "processed"
|
|
if processed.is_dir():
|
|
search_dirs.append(processed)
|
|
|
|
for search_dir in search_dirs:
|
|
for job_file in search_dir.glob("job-*.json"):
|
|
# Strip "job-{timestamp}-" prefix to get the slug
|
|
parts = job_file.stem.split("-", 2)
|
|
if len(parts) >= 3:
|
|
job_slug = parts[2]
|
|
if job_slug == slug:
|
|
try:
|
|
data = json.loads(job_file.read_text(encoding="utf-8"))
|
|
task_ids = data.get("task_ids", [])
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
log.warning("Could not read job file %s: %s", job_file, e)
|
|
break
|
|
if task_ids:
|
|
break
|
|
|
|
# Fallback: match from the task list we already have
|
|
if not task_ids:
|
|
for task in tasks:
|
|
keyword = task.custom_fields.get("Keyword", "")
|
|
if not keyword:
|
|
continue
|
|
keyword_norm = _normalize_for_match(str(keyword))
|
|
if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check):
|
|
task_ids.append(task.id)
|
|
|
|
# Post comments
|
|
for tid in task_ids:
|
|
try:
|
|
client.add_comment(tid, comment)
|
|
except Exception as e:
|
|
log.warning("Failed to comment on task %s: %s", tid, e)
|
|
|
|
# ── Morning Briefing ──
|
|
|
|
_CENTRAL = ZoneInfo("America/Chicago")
|
|
|
|
def _briefing_loop(self):
|
|
"""Check every 60s if it's time to send the morning briefing."""
|
|
# Wait before first check
|
|
self._stop_event.wait(30)
|
|
|
|
while not self._stop_event.is_set():
|
|
try:
|
|
forced = self._force_briefing.is_set()
|
|
if forced:
|
|
self._force_briefing.clear()
|
|
|
|
now = datetime.now(self._CENTRAL)
|
|
today = now.strftime("%Y-%m-%d")
|
|
is_weekday = now.weekday() < 5 # Mon=0 .. Fri=4
|
|
|
|
target_hour = 6 if is_weekday else 8
|
|
target_minute = 30 if is_weekday else 0
|
|
|
|
at_briefing_time = (
|
|
now.hour == target_hour and now.minute == target_minute
|
|
)
|
|
already_sent = self._last_briefing_date == today
|
|
|
|
if forced or (at_briefing_time and not already_sent):
|
|
self._send_morning_briefing()
|
|
self._last_briefing_date = today
|
|
self._loop_timestamps["briefing"] = datetime.now(UTC).isoformat()
|
|
|
|
except Exception as e:
|
|
log.error("Briefing loop error: %s", e)
|
|
|
|
self._interruptible_wait(60, self._force_briefing)
|
|
|
|
def _send_morning_briefing(self):
|
|
"""Build and send the morning briefing notification."""
|
|
client = self._get_clickup_client()
|
|
space_id = self.config.clickup.space_id
|
|
if not space_id:
|
|
log.warning("Briefing skipped — no space_id configured")
|
|
return
|
|
|
|
# Query tasks in the 4 relevant statuses
|
|
briefing_statuses = [
|
|
"running cora",
|
|
"outline review",
|
|
self.config.clickup.pr_review_status, # "pr needs review"
|
|
self.config.clickup.error_status, # "error"
|
|
]
|
|
|
|
try:
|
|
tasks = client.get_tasks_from_overall_lists(
|
|
space_id, statuses=briefing_statuses
|
|
)
|
|
except Exception as e:
|
|
log.error("Briefing: ClickUp query failed: %s", e)
|
|
return
|
|
|
|
# Bucket tasks by status
|
|
cora_tasks = []
|
|
outline_tasks = []
|
|
pr_tasks = []
|
|
error_tasks = []
|
|
|
|
for t in tasks:
|
|
if t.status == "running cora":
|
|
cora_tasks.append(t)
|
|
elif t.status == "outline review":
|
|
outline_tasks.append(t)
|
|
elif t.status == self.config.clickup.pr_review_status:
|
|
pr_tasks.append(t)
|
|
elif t.status == self.config.clickup.error_status:
|
|
error_tasks.append(t)
|
|
|
|
# If nothing needs attention, send a short all-clear
|
|
if not any([cora_tasks, outline_tasks, pr_tasks, error_tasks]):
|
|
self._notify("Morning briefing: All clear — nothing needs attention.", category="briefing")
|
|
return
|
|
|
|
lines = ["Morning Briefing", ""]
|
|
|
|
# Section 1: Cora reports
|
|
if cora_tasks:
|
|
cora_statuses = self._check_cora_file_status(cora_tasks)
|
|
lines.append(f"CORA REPORTS ({len(cora_tasks)})")
|
|
by_customer = self._group_by_customer(cora_tasks)
|
|
for customer, ctasks in by_customer.items():
|
|
lines.append(f" {customer}:")
|
|
for t in ctasks:
|
|
status_note = cora_statuses.get(t.id, "")
|
|
suffix = f" — {status_note}" if status_note else ""
|
|
lines.append(f" - {t.name}{suffix}")
|
|
lines.append("")
|
|
|
|
# Section 2: Outlines to approve
|
|
if outline_tasks:
|
|
lines.append(f"OUTLINES TO APPROVE ({len(outline_tasks)})")
|
|
by_customer = self._group_by_customer(outline_tasks)
|
|
for customer, ctasks in by_customer.items():
|
|
lines.append(f" {customer}:")
|
|
for t in ctasks:
|
|
lines.append(f" - {t.name}")
|
|
lines.append("")
|
|
|
|
# Section 3: PRs to review
|
|
if pr_tasks:
|
|
lines.append(f"PRs TO REVIEW ({len(pr_tasks)})")
|
|
by_customer = self._group_by_customer(pr_tasks)
|
|
for customer, ctasks in by_customer.items():
|
|
lines.append(f" {customer}:")
|
|
for t in ctasks:
|
|
lines.append(f" - {t.name}")
|
|
lines.append("")
|
|
|
|
# Section 4: Errors
|
|
if error_tasks:
|
|
lines.append(f"ERRORS ({len(error_tasks)})")
|
|
by_customer = self._group_by_customer(error_tasks)
|
|
for customer, ctasks in by_customer.items():
|
|
lines.append(f" {customer}:")
|
|
for t in ctasks:
|
|
lines.append(f" - {t.name}")
|
|
lines.append("")
|
|
|
|
message = "\n".join(lines).rstrip()
|
|
self._notify(message, category="briefing")
|
|
log.info("Morning briefing sent (%d cora, %d outlines, %d PRs, %d errors)",
|
|
len(cora_tasks), len(outline_tasks), len(pr_tasks), len(error_tasks))
|
|
|
|
def _group_by_customer(self, tasks) -> dict[str, list]:
|
|
"""Group tasks by their Customer custom field."""
|
|
groups: dict[str, list] = {}
|
|
for t in tasks:
|
|
customer = t.custom_fields.get("Client", "") or "Unknown"
|
|
groups.setdefault(str(customer), []).append(t)
|
|
return groups
|
|
|
|
def _check_cora_file_status(self, cora_tasks) -> dict[str, str]:
|
|
"""For each 'running cora' task, check where its xlsx sits on the network.
|
|
|
|
Returns a dict of task_id ->human-readable status note.
|
|
"""
|
|
from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match
|
|
|
|
# Collect xlsx filenames from all relevant folders
|
|
folders = {
|
|
"cora_human": Path(self.config.autocora.cora_human_inbox),
|
|
"cora_human_processed": Path(self.config.autocora.cora_human_inbox) / "processed",
|
|
"lb_inbox": Path(self.config.link_building.watch_folder),
|
|
"lb_processed": Path(self.config.link_building.watch_folder) / "processed",
|
|
"content_inbox": Path(self.config.content.cora_inbox),
|
|
"content_processed": Path(self.config.content.cora_inbox) / "processed",
|
|
}
|
|
|
|
# Build a map: normalized_stem ->set of folder keys
|
|
file_locations: dict[str, set[str]] = {}
|
|
for folder_key, folder_path in folders.items():
|
|
if not folder_path.exists():
|
|
continue
|
|
for xlsx in folder_path.glob("*.xlsx"):
|
|
if xlsx.name.startswith("~$"):
|
|
continue
|
|
stem = xlsx.stem.lower().replace("-", " ").replace("_", " ")
|
|
stem = re.sub(r"\s+", " ", stem).strip()
|
|
file_locations.setdefault(stem, set()).add(folder_key)
|
|
|
|
# Match each task's keyword against the file stems
|
|
result: dict[str, str] = {}
|
|
for task in cora_tasks:
|
|
keyword = task.custom_fields.get("Keyword", "") or task.name
|
|
keyword_norm = _normalize_for_match(str(keyword))
|
|
|
|
# Find which folders have a matching file
|
|
matched_folders: set[str] = set()
|
|
for stem, locs in file_locations.items():
|
|
if _fuzzy_keyword_match(keyword_norm, stem, self._llm_plural_check):
|
|
matched_folders.update(locs)
|
|
|
|
if not matched_folders:
|
|
result[task.id] = "needs cora.xlsx from worker machine"
|
|
elif matched_folders & {"lb_inbox", "content_inbox"}:
|
|
result[task.id] = "queued for processing"
|
|
elif matched_folders & {"lb_processed", "content_processed"}:
|
|
result[task.id] = "pipeline complete"
|
|
elif "cora_human" in matched_folders:
|
|
result[task.id] = "waiting for distribution"
|
|
elif "cora_human_processed" in matched_folders:
|
|
result[task.id] = "distributed, waiting for pipeline"
|
|
else:
|
|
result[task.id] = "needs cora.xlsx from worker machine"
|
|
|
|
return result
|