"""Task scheduler with heartbeat, ClickUp polling, and folder watch support.""" from __future__ import annotations import contextlib import json import logging import re import shutil import threading import httpx from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING from zoneinfo import ZoneInfo from croniter import croniter if TYPE_CHECKING: from .agent import Agent from .config import Config from .db import Database from .notifications import NotificationBus log = logging.getLogger(__name__) # Dedicated logger for "tool returned error but likely handled it" cases. # Writes to logs/pipeline_errors.log for manual review. _pipeline_err_log = logging.getLogger("cheddahbot.pipeline_errors") _pipeline_err_log.propagate = False _pe_dir = Path(__file__).resolve().parent.parent / "logs" _pe_dir.mkdir(exist_ok=True) _pe_handler = logging.FileHandler(_pe_dir / "pipeline_errors.log", encoding="utf-8") _pe_handler.setFormatter( logging.Formatter("%(asctime)s | %(message)s") ) _pipeline_err_log.addHandler(_pe_handler) _pipeline_err_log.setLevel(logging.INFO) HEARTBEAT_OK = "HEARTBEAT_OK" # Only tasks in these statuses are eligible for xlsx ->ClickUp matching. # "to do" is excluded to prevent accidental matches and AutoCora race conditions. # To force-reuse an xlsx for a "to do" task, set status to "running cora" first. _CORA_ELIGIBLE_STATUSES = frozenset({"running cora", "error"}) class Scheduler: # Tasks due within this window are eligible for execution DUE_DATE_WINDOW_WEEKS = 3 def __init__( self, config: Config, db: Database, agent: Agent, notification_bus: NotificationBus | None = None, ): self.config = config self.db = db self.agent = agent self.notification_bus = notification_bus self._stop_event = threading.Event() self._force_heartbeat = threading.Event() self._force_poll = threading.Event() self._thread: threading.Thread | None = None self._heartbeat_thread: threading.Thread | None = None self._clickup_thread: threading.Thread | None = None self._folder_watch_thread: threading.Thread | None = None self._autocora_thread: threading.Thread | None = None self._content_watch_thread: threading.Thread | None = None self._cora_distribute_thread: threading.Thread | None = None self._briefing_thread: threading.Thread | None = None self._force_autocora = threading.Event() self._force_briefing = threading.Event() self._last_briefing_date: str | None = None self._clickup_client = None self._field_filter_cache: dict | None = None self._loop_timestamps: dict[str, str | None] = { "heartbeat": None, "poll": None, "clickup": None, "folder_watch": None, "autocora": None, "content_watch": None, "cora_distribute": None, "briefing": None, } self._active_executions: dict[str, dict] = {} self._active_lock = threading.Lock() self._plural_cache: dict[tuple[str, str], bool] = {} def _llm_plural_check(self, a: str, b: str) -> bool: """Ask the chat brain if two keywords are the same aside from plural form. Uses OpenRouter with the configured CHEDDAH_CHAT_MODEL. Results are cached for the session to avoid repeat calls. """ key = (a, b) if a <= b else (b, a) if key in self._plural_cache: return self._plural_cache[key] api_key = self.config.openrouter_api_key model = self.config.chat_model if not api_key: log.warning("LLM plural check: no OpenRouter API key, returning False") return False try: resp = httpx.post( "https://openrouter.ai/api/v1/chat/completions", headers={"Authorization": f"Bearer {api_key}"}, json={ "model": model, "max_tokens": 5, "messages": [ { "role": "system", "content": ( "You compare SEO keywords. Reply with ONLY 'YES' or 'NO'. " "Answer YES only if the two keywords are identical except for " "singular vs plural word forms (e.g. 'shaft' vs 'shafts', " "'company' vs 'companies'). Answer NO if they differ in any " "other way (extra words, different words, different meaning)." ), }, { "role": "user", "content": f'Keyword A: "{a}"\nKeyword B: "{b}"', }, ], }, timeout=15, ) resp.raise_for_status() answer = (resp.json()["choices"][0]["message"]["content"] or "").strip() result = "YES" in answer.upper() log.debug("LLM plural check: '%s' vs '%s' ->%s (%s)", a, b, result, answer) except Exception as e: log.warning("LLM plural check failed for '%s' vs '%s': %s", a, b, e) result = False self._plural_cache[key] = result return result def start(self): """Start the scheduler, heartbeat, and ClickUp threads.""" self._thread = threading.Thread(target=self._poll_loop, daemon=True, name="scheduler") self._thread.start() self._heartbeat_thread = threading.Thread( target=self._heartbeat_loop, daemon=True, name="heartbeat" ) self._heartbeat_thread.start() # Start ClickUp polling if configured if self.config.clickup.enabled: self._clickup_thread = threading.Thread( target=self._clickup_loop, daemon=True, name="clickup" ) self._clickup_thread.start() log.info( "ClickUp polling started (interval=%dm)", self.config.clickup.poll_interval_minutes ) else: log.info("ClickUp integration disabled (no API token)") # Start folder watcher if configured watch_folder = self.config.link_building.watch_folder if watch_folder: self._folder_watch_thread = threading.Thread( target=self._folder_watch_loop, daemon=True, name="folder-watch" ) self._folder_watch_thread.start() log.info( "Folder watcher started (folder=%s, interval=%dm)", watch_folder, self.config.link_building.watch_interval_minutes, ) else: log.info("Folder watcher disabled (no watch_folder configured)") # Start AutoCora result polling if configured if self.config.autocora.enabled: self._autocora_thread = threading.Thread( target=self._autocora_loop, daemon=True, name="autocora" ) self._autocora_thread.start() log.info( "AutoCora polling started (interval=%dm)", self.config.autocora.poll_interval_minutes, ) else: log.info("AutoCora polling disabled") # Start content folder watcher if configured content_inbox = self.config.content.cora_inbox if content_inbox: self._content_watch_thread = threading.Thread( target=self._content_watch_loop, daemon=True, name="content-watch" ) self._content_watch_thread.start() log.info( "Content folder watcher started (folder=%s, interval=%dm)", content_inbox, self.config.link_building.watch_interval_minutes, ) else: log.info("Content folder watcher disabled (no cora_inbox configured)") # Start Cora distribution watcher if configured cora_human_inbox = self.config.autocora.cora_human_inbox if cora_human_inbox: self._cora_distribute_thread = threading.Thread( target=self._cora_distribute_loop, daemon=True, name="cora-distribute" ) self._cora_distribute_thread.start() log.info( "Cora distribution watcher started (folder=%s, interval=%dm)", cora_human_inbox, self.config.link_building.watch_interval_minutes, ) else: log.info("Cora distribution watcher disabled (no cora_human_inbox configured)") # Start morning briefing loop if ClickUp is configured if self.config.clickup.enabled: self._briefing_thread = threading.Thread( target=self._briefing_loop, daemon=True, name="briefing" ) self._briefing_thread.start() log.info("Morning briefing loop started") else: log.info("Morning briefing disabled (ClickUp not configured)") log.info( "Scheduler started (poll=%ds, heartbeat=%dm)", self.config.scheduler.poll_interval_seconds, self.config.scheduler.heartbeat_interval_minutes, ) def stop(self): self._stop_event.set() if self._clickup_client: self._clickup_client.close() def _notify(self, message: str, category: str = "clickup"): """Push a notification through the bus if available.""" if self.notification_bus: self.notification_bus.push(message, category) else: log.info("Notification [%s]: %s", category, message) # ── Loop control ── def _interruptible_wait(self, seconds: float, force_event: threading.Event | None = None): """Wait for *seconds*, returning early if stop or force event fires.""" remaining = seconds while remaining > 0 and not self._stop_event.is_set(): if force_event and force_event.is_set(): force_event.clear() return self._stop_event.wait(min(5, remaining)) remaining -= 5 def force_heartbeat(self): """Wake the heartbeat loop immediately.""" self._force_heartbeat.set() def force_poll(self): """Wake the scheduler poll loop immediately.""" self._force_poll.set() def force_autocora(self): """Wake the AutoCora poll loop immediately.""" self._force_autocora.set() def force_briefing(self): """Force the morning briefing to send now (ignores schedule/dedup).""" self._last_briefing_date = None self._force_briefing.set() def get_loop_timestamps(self) -> dict[str, str | None]: """Return last_run timestamps for all loops (in-memory).""" return dict(self._loop_timestamps) def _register_execution(self, task_id: str, name: str, tool_name: str) -> None: """Register a task as actively executing.""" with self._active_lock: self._active_executions[task_id] = { "name": name, "tool": tool_name, "started_at": datetime.now(UTC), "thread": threading.current_thread().name, } def _unregister_execution(self, task_id: str) -> None: """Remove a task from the active executions registry.""" with self._active_lock: self._active_executions.pop(task_id, None) def get_active_executions(self) -> dict[str, dict]: """Return a snapshot of currently executing tasks.""" with self._active_lock: return dict(self._active_executions) # ── Scheduled Tasks ── def _poll_loop(self): while not self._stop_event.is_set(): try: self._run_due_tasks() self._loop_timestamps["poll"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Scheduler poll error: %s", e) self._interruptible_wait(self.config.scheduler.poll_interval_seconds, self._force_poll) def _run_due_tasks(self): tasks = self.db.get_due_tasks() for task in tasks: try: log.info("Running scheduled task: %s", task["name"]) result = self.agent.execute_task(task["prompt"]) self.db.log_task_run(task["id"], result=result[:2000]) # Calculate next run schedule = task["schedule"] if schedule.startswith("once:"): # One-time task, disable it self.db.disable_task(task["id"]) else: # Cron schedule - calculate next run now = datetime.now(UTC) cron = croniter(schedule, now) next_run = cron.get_next(datetime) self.db.update_task_next_run(task["id"], next_run.isoformat()) except Exception as e: log.error("Task '%s' failed: %s", task["name"], e) self.db.log_task_run(task["id"], error=str(e)) # ── Heartbeat ── def _heartbeat_loop(self): interval = self.config.scheduler.heartbeat_interval_minutes * 60 # Wait a bit before first heartbeat self._stop_event.wait(60) while not self._stop_event.is_set(): try: self._run_heartbeat() self._loop_timestamps["heartbeat"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Heartbeat error: %s", e) self._interruptible_wait(interval, self._force_heartbeat) def _run_heartbeat(self): heartbeat_path = self.config.identity_dir / "HEARTBEAT.md" if not heartbeat_path.exists(): return checklist = heartbeat_path.read_text(encoding="utf-8") prompt = ( f"HEARTBEAT CHECK. Review this checklist and take action if needed.\n" f"If nothing needs attention, respond with exactly: {HEARTBEAT_OK}\n\n" f"{checklist}" ) result = self.agent.execute_task(prompt, system_context=checklist) if HEARTBEAT_OK in result: log.debug("Heartbeat: all clear") else: log.info("Heartbeat action taken: %s", result[:200]) # ── ClickUp Integration ── def _get_clickup_client(self): """Lazy-init the ClickUp API client.""" if self._clickup_client is None: from .clickup import ClickUpClient self._clickup_client = ClickUpClient( api_token=self.config.clickup.api_token, workspace_id=self.config.clickup.workspace_id, task_type_field_name=self.config.clickup.task_type_field_name, ) return self._clickup_client # Maximum time a task can stay in "automation underway" before recovery (seconds) STALE_TASK_THRESHOLD_SECONDS = 6 * 60 * 60 # 6 hours def _clickup_loop(self): """Poll ClickUp for tasks on a regular interval.""" interval = self.config.clickup.poll_interval_minutes * 60 # Wait before first poll to let other systems initialize self._stop_event.wait(30) while not self._stop_event.is_set(): try: self._poll_clickup() self._recover_stale_tasks() self._loop_timestamps["clickup"] = datetime.now(UTC).isoformat() except Exception as e: log.error("ClickUp poll error: %s", e) self._interruptible_wait(interval) def _discover_field_filter(self, client): """Discover and cache the Work Category field UUID + option map.""" space_id = self.config.clickup.space_id list_ids = client.get_list_ids_from_space(space_id) if not list_ids: log.warning("No lists found in space %s — cannot discover field filter", space_id) return None # Use the first list to discover field metadata first_list = next(iter(list_ids)) field_name = self.config.clickup.task_type_field_name result = client.discover_field_filter(first_list, field_name) if result: log.info( "Discovered field filter for '%s': field_id=%s, options=%s", field_name, result["field_id"], list(result["options"].keys()), ) else: log.warning( "Field '%s' not found in list %s — falling back to client-side filtering", field_name, first_list, ) return result def _poll_clickup(self): """Poll ClickUp for eligible tasks and execute them immediately.""" client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: log.warning("ClickUp space_id not configured, skipping poll") return skill_map = self.config.clickup.skill_map if not skill_map: log.debug("No skill_map configured, skipping ClickUp poll") return # Discover field filter on first poll if self._field_filter_cache is None: self._field_filter_cache = self._discover_field_filter(client) or {} # Build API filters now_ms = int(datetime.now(UTC).timestamp() * 1000) due_date_lt = now_ms + (self.DUE_DATE_WINDOW_WEEKS * 7 * 24 * 60 * 60 * 1000) # Explicit allowlist of Work Category values to poll allowed_types = self.config.clickup.poll_task_types or list(skill_map.keys()) custom_fields_filter = None if self._field_filter_cache and self._field_filter_cache.get("options"): field_id = self._field_filter_cache["field_id"] options = self._field_filter_cache["options"] matching_opt_ids = [options[name] for name in allowed_types if name in options] if matching_opt_ids: import json as _json custom_fields_filter = _json.dumps( [{"field_id": field_id, "operator": "ANY", "value": matching_opt_ids}] ) tasks = client.get_tasks_from_space( space_id, statuses=self.config.clickup.poll_statuses, due_date_lt=due_date_lt, custom_fields=custom_fields_filter, ) for task in tasks: # Client-side verify: Work Category must be in allowed types AND skill_map if task.task_type not in allowed_types or task.task_type not in skill_map: continue # Respect auto_execute flag — skip tasks that require manual trigger # Unless the task status matches an auto_execute_on_status entry mapping = skill_map[task.task_type] if not mapping.get("auto_execute", False): status_triggers = mapping.get("auto_execute_on_status", []) if task.status.lower() not in [s.lower() for s in status_triggers]: hint = mapping.get("trigger_hint", "manual trigger only") log.debug( "Skipping task '%s' (type=%s): auto_execute is false (%s)", task.name, task.task_type, hint, ) continue # Client-side verify: due_date must exist and be within window if not task.due_date: continue try: task_due_ms = int(task.due_date) if task_due_ms > due_date_lt: continue except (ValueError, TypeError): continue self._execute_task(task) def _execute_task(self, task): """Execute a single ClickUp task immediately. Tools own their own ClickUp sync (status, comments, attachments). The scheduler just calls the tool and handles errors. """ skill_map = self.config.clickup.skill_map mapping = skill_map.get(task.task_type, {}) tool_name = mapping.get("tool", "") if not tool_name: log.warning("No tool in skill_map for type '%s'", task.task_type) return task_id = task.id client = self._get_clickup_client() # Validate required fields before starting args = self._build_tool_args_from_task(task, mapping) required = mapping.get("required_fields", []) missing = [f for f in required if not args.get(f)] if missing: field_mapping = mapping.get("field_mapping", {}) missing_clickup = [field_mapping.get(f, f) for f in missing] msg = f"Skipped: missing required field(s): {', '.join(missing_clickup)}" log.debug("Skipping ClickUp task %s (%s) — %s", task_id, task.name, msg) self._notify( f"Skipped ClickUp task: **{task.name}**\n{msg}", category="clickup", ) return # Move to "automation underway" on ClickUp immediately client.update_task_status(task_id, self.config.clickup.automation_status) log.info("Executing ClickUp task: %s ->%s", task.name, tool_name) self._notify(f"Executing ClickUp task: **{task.name}** ->Skill: `{tool_name}`") self._register_execution(task_id, task.name, tool_name) try: # args already built during validation above args["clickup_task_id"] = task_id args["clickup_task_status"] = task.status # Map Work Category to content_type so create_content routes correctly if tool_name == "create_content": if task.task_type == "On Page Optimization": args["content_type"] = "on page optimization" elif task.task_type == "Content Creation": args["content_type"] = "new content" # Execute the skill via the tool registry if hasattr(self.agent, "_tools") and self.agent._tools: result = self.agent._tools.execute(tool_name, args) else: result = self.agent.execute_task( f"Execute the '{tool_name}' tool for ClickUp task '{task.name}'. " f"Task description: {task.custom_fields}" ) # Check if the tool skipped or reported an error without doing work if result.startswith("Skipped:") or result.startswith("Error:"): client.add_comment( task_id, f"[WARNING]CheddahBot could not execute this task.\n\n{result[:2000]}", ) client.update_task_status(task_id, self.config.clickup.error_status) self._notify(f"ClickUp task skipped: **{task.name}**\nReason: {result[:200]}") log.debug("ClickUp task skipped: %s — %s", task.name, result[:200]) return # Tool handled its own ClickUp sync — just log success self._notify(f"**{task.name}** done — ran `{tool_name}` successfully.") log.info("ClickUp task completed: %s", task.name) except Exception as e: client.add_comment( task_id, f"[FAILED]CheddahBot failed to complete this task.\n\nError: {str(e)[:2000]}" ) client.update_task_status(task_id, self.config.clickup.error_status) self._notify( f"ClickUp task failed: **{task.name}**\n" f"Skill: `{tool_name}` | Error: {str(e)[:200]}" ) log.error("ClickUp task failed: %s — %s", task.name, e) finally: self._unregister_execution(task_id) def _recover_stale_tasks(self): """Reset tasks stuck in 'automation underway' for too long. If a task has been in the automation status for more than STALE_TASK_THRESHOLD_SECONDS (default 2 hours), reset it to the first poll status (usually 'to do') so it gets retried. """ client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: return automation_status = self.config.clickup.automation_status try: stale_tasks = client.get_tasks_from_space(space_id, statuses=[automation_status]) except Exception as e: log.warning("Failed to query stale tasks: %s", e) return now_ms = int(datetime.now(UTC).timestamp() * 1000) threshold_ms = self.STALE_TASK_THRESHOLD_SECONDS * 1000 for task in stale_tasks: if not task.date_updated: continue try: updated_ms = int(task.date_updated) except (ValueError, TypeError): continue age_ms = now_ms - updated_ms if age_ms > threshold_ms: poll_sts = self.config.clickup.poll_statuses reset_status = poll_sts[0] if poll_sts else "to do" log.warning( "Recovering stale task %s (%s) — stuck in '%s' for %.1f hours", task.id, task.name, automation_status, age_ms / 3_600_000, ) client.update_task_status(task.id, reset_status) client.add_comment( task.id, f"[WARNING]CheddahBot auto-recovered this task. It was stuck in " f"'{automation_status}' for {age_ms / 3_600_000:.1f} hours. " f"Reset to '{reset_status}' for retry.", ) self._notify( f"Recovered stale task: **{task.name}** — " f"reset from '{automation_status}' to '{reset_status}'", category="clickup", ) def _build_tool_args_from_task(self, task, mapping: dict) -> dict: """Build tool arguments from a ClickUp task using the field mapping.""" field_mapping = mapping.get("field_mapping", {}) args = {} for tool_param, source in field_mapping.items(): if source == "task_name": args[tool_param] = task.name elif source == "task_description": args[tool_param] = task.custom_fields.get("description", "") else: # Look up custom field by name args[tool_param] = task.custom_fields.get(source, "") return args # ── AutoCora Result Polling ── def _autocora_loop(self): """Auto-submit jobs for today's tasks, then poll for results.""" interval = self.config.autocora.poll_interval_minutes * 60 # Wait before first poll self._stop_event.wait(30) while not self._stop_event.is_set(): try: self._auto_submit_cora_jobs() self._poll_autocora_results() self._loop_timestamps["autocora"] = datetime.now(UTC).isoformat() except Exception as e: log.error("AutoCora poll error: %s", e) self._interruptible_wait(interval, self._force_autocora) def _auto_submit_cora_jobs(self): """Auto-submit AutoCora jobs using multi-pass sweep (no explicit date).""" from .tools.autocora import submit_autocora_jobs if not self.config.clickup.api_token: return ctx = { "config": self.config, "db": self.db, "agent": self.agent, } result = submit_autocora_jobs(ctx=ctx) log.info("AutoCora auto-submit (sweep): %s", result) def _poll_autocora_results(self): """Check for completed AutoCora results and update ClickUp tasks. Scans the results folder for .result files. Each file contains JSON with task_ids and status. After processing, moves the file to results/processed/ to prevent re-processing. """ from .tools.autocora import _parse_result autocora = self.config.autocora results_dir = Path(autocora.results_dir) if not results_dir.exists(): log.debug("AutoCora results dir does not exist: %s", results_dir) return result_files = list(results_dir.glob("*.result")) if not result_files: return client = self._get_clickup_client() if self.config.clickup.api_token else None processed_dir = results_dir / "processed" for result_path in result_files: raw = result_path.read_text(encoding="utf-8").strip() result_data = _parse_result(raw) task_ids = result_data.get("task_ids", []) status = result_data.get("status", "UNKNOWN") keyword = result_data.get("keyword", result_path.stem) if status == "SUCCESS": if client and task_ids: for tid in task_ids: client.update_task_status(tid, autocora.success_status) client.add_comment(tid, f"Cora report generated for \"{keyword}\" — ready for you to look at it.") self._notify( f"AutoCora SUCCESS: **{keyword}** — " f"{len(task_ids)} task(s) moved to '{autocora.success_status}'", category="autocora", ) elif status == "FAILURE": reason = result_data.get("reason", "unknown error") if client and task_ids: for tid in task_ids: client.update_task_status(tid, autocora.error_status) client.add_comment( tid, f"Cora report failed for keyword: {keyword}\nReason: {reason}", ) self._notify( f"AutoCora FAILURE: **{keyword}** — {reason}", category="autocora", ) log.debug("AutoCora result for '%s': %s", keyword, status) # Move result file to processed/ processed_dir.mkdir(exist_ok=True) try: result_path.rename(processed_dir / result_path.name) except OSError as e: log.warning("Could not move result file %s: %s", result_path.name, e) # If it already exists in processed/, delete the source to stop reprocessing if (processed_dir / result_path.name).exists(): result_path.unlink(missing_ok=True) # ── Folder Watcher ── def _folder_watch_loop(self): """Poll the watch folder for new .xlsx files on a regular interval.""" interval = self.config.link_building.watch_interval_minutes * 60 # Wait before first scan to let other systems initialize self._stop_event.wait(60) while not self._stop_event.is_set(): try: self._scan_watch_folder() self._loop_timestamps["folder_watch"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Folder watcher error: %s", e) self._interruptible_wait(interval) def _scan_watch_folder(self): """Scan the watch folder for new .xlsx files and match to ClickUp tasks.""" watch_folder = Path(self.config.link_building.watch_folder) if not watch_folder.exists(): log.warning("Watch folder does not exist: %s", watch_folder) return xlsx_files = sorted(watch_folder.glob("*.xlsx")) if not xlsx_files: log.debug("No .xlsx files in watch folder") return # Check processed/ subfolder for already-handled files processed_dir = watch_folder / "processed" processed_names = set() if processed_dir.exists(): processed_names = {f.name for f in processed_dir.glob("*.xlsx")} for xlsx_path in xlsx_files: filename = xlsx_path.name # Skip Office temp/lock files (e.g. ~$insert_molding.xlsx) if filename.startswith("~$"): continue # Skip files already in processed/ if filename in processed_names: continue log.info("Folder watcher: new .xlsx found: %s", filename) self._process_watched_file(xlsx_path) def _process_watched_file(self, xlsx_path: Path): """Try to match a watched .xlsx file to a ClickUp task and run the pipeline.""" filename = xlsx_path.name # Normalize filename stem for matching # e.g., "precision-cnc-machining" ->"precision cnc machining" stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ") stem = re.sub(r"\s+", " ", stem).strip() # Try to find matching ClickUp task matched_task = None if self.config.clickup.enabled: matched_task = self._match_xlsx_to_clickup(stem) if not matched_task: log.warning("No ClickUp task match for '%s' — skipping", filename) self._notify( f"Folder watcher: no ClickUp match for **{filename}**.\n" f"Create a Link Building task with Keyword " f"matching '{stem}' to enable auto-processing.", category="linkbuilding", ) return # Extract tool args from matched task task_id = matched_task.id log.info("Matched '%s' to ClickUp task %s (%s)", filename, task_id, matched_task.name) # Set ClickUp status to "automation underway" client = self._get_clickup_client() client.update_task_status(task_id, self.config.clickup.automation_status) self._notify( f"Folder watcher: matched **{filename}** to ClickUp task **{matched_task.name}**.\n" f"Starting Cora Backlinks pipeline...", category="linkbuilding", ) # Build tool args from the matched task's custom fields money_site_url = matched_task.custom_fields.get("IMSURL", "") or "" if not money_site_url: log.warning("Task %s (%s) missing IMSURL — skipping", task_id, matched_task.name) client.add_comment( task_id, "[FAILED]Link building skipped — IMSURL field is empty. " "Set the IMSURL field in ClickUp so the pipeline knows where to build links.", ) client.update_task_status(task_id, self.config.clickup.error_status) self._notify( f"Folder watcher: **{filename}** matched task **{matched_task.name}** " f"but **IMSURL is empty**. Set the IMSURL field in ClickUp before " f"the file can be processed.", category="linkbuilding", ) return args = { "xlsx_path": str(xlsx_path), "project_name": matched_task.custom_fields.get("Keyword", "") or matched_task.name, "money_site_url": money_site_url, "custom_anchors": matched_task.custom_fields.get("CustomAnchors", "") or "", "cli_flags": matched_task.custom_fields.get("CLIFlags", "") or "", "clickup_task_id": task_id, } # Parse branded_plus_ratio bp_raw = matched_task.custom_fields.get("BrandedPlusRatio", "") if bp_raw: with contextlib.suppress(ValueError, TypeError): args["branded_plus_ratio"] = float(bp_raw) self._register_execution(task_id, matched_task.name, "run_cora_backlinks") try: # Execute via tool registry if hasattr(self.agent, "_tools") and self.agent._tools: result = self.agent._tools.execute("run_cora_backlinks", args) else: result = "Error: tool registry not available" if "Error" in result and "## Step" not in result: # Pipeline failed — tool handles its own ClickUp error status _pipeline_err_log.info( "LINKBUILDING | task=%s | file=%s | result=%s", task_id, filename, result[:500], ) self._notify( f"Folder watcher: pipeline **failed** for **{filename}**.\n" f"Error: {result[:200]}", category="linkbuilding", ) else: # Success — move file to processed/ processed_dir = xlsx_path.parent / "processed" processed_dir.mkdir(exist_ok=True) dest = processed_dir / filename try: shutil.move(str(xlsx_path), str(dest)) log.info("Moved %s to %s", filename, dest) except OSError as e: log.warning("Could not move %s to processed: %s", filename, e) self._notify( f"Link building finished for **{matched_task.name}** — " f"Cora backlinks placed, XLSX moved to processed.", category="linkbuilding", ) except Exception as e: log.error("Folder watcher pipeline error for %s: %s", filename, e) client.add_comment( task_id, f"[FAILED]Link building pipeline crashed.\n\nError: {str(e)[:2000]}", ) client.update_task_status(task_id, self.config.clickup.error_status) finally: self._unregister_execution(task_id) def _match_xlsx_to_clickup(self, normalized_stem: str): """Find a ClickUp Link Building task whose Keyword matches the file stem. Returns the matched ClickUpTask or None. """ from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: return None try: tasks = client.get_tasks_from_overall_lists(space_id, statuses=list(_CORA_ELIGIBLE_STATUSES)) except Exception as e: log.warning("ClickUp query failed in _match_xlsx_to_clickup: %s", e) return None for task in tasks: if task.status not in _CORA_ELIGIBLE_STATUSES: continue if task.task_type != "Link Building": continue lb_method = task.custom_fields.get("LB Method", "") if lb_method and lb_method != "Cora Backlinks": continue keyword = task.custom_fields.get("Keyword", "") if not keyword: continue keyword_norm = _normalize_for_match(str(keyword)) if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check): return task return None # ── Content Folder Watcher ── def _content_watch_loop(self): """Poll the content Cora inbox for new .xlsx files on a regular interval.""" interval = self.config.link_building.watch_interval_minutes * 60 # Wait before first scan to let other systems initialize self._stop_event.wait(60) while not self._stop_event.is_set(): try: self._scan_content_folder() self._loop_timestamps["content_watch"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Content folder watcher error: %s", e) self._interruptible_wait(interval) def _scan_content_folder(self): """Scan the content Cora inbox for new .xlsx files and match to ClickUp tasks.""" inbox = Path(self.config.content.cora_inbox) if not inbox.exists(): log.warning("Content Cora inbox does not exist: %s", inbox) return xlsx_files = sorted(inbox.glob("*.xlsx")) if not xlsx_files: log.debug("No .xlsx files in content Cora inbox") return # Check processed/ subfolder for already-handled files processed_dir = inbox / "processed" processed_names = set() if processed_dir.exists(): processed_names = {f.name for f in processed_dir.glob("*.xlsx")} for xlsx_path in xlsx_files: filename = xlsx_path.name # Skip Office temp/lock files if filename.startswith("~$"): continue # Skip files already in processed/ if filename in processed_names: continue log.info("Content watcher: new .xlsx found: %s", filename) self._process_content_file(xlsx_path) def _process_content_file(self, xlsx_path: Path): """Match a content Cora .xlsx to a ClickUp task and run create_content.""" filename = xlsx_path.name stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ") stem = re.sub(r"\s+", " ", stem).strip() # Try to find matching ClickUp task matched_task = None if self.config.clickup.enabled: matched_task = self._match_xlsx_to_content_task(stem) if not matched_task: log.warning("No ClickUp content task match for '%s' — skipping", filename) self._notify( f"Content watcher: no ClickUp match for **{filename}**.\n" f"Create a Content Creation or On Page Optimization task with Keyword " f"matching '{stem}' to enable auto-processing.", category="content", ) return task_id = matched_task.id log.info("Matched '%s' to ClickUp task %s (%s)", filename, task_id, matched_task.name) # Set ClickUp status to "automation underway" client = self._get_clickup_client() client.update_task_status(task_id, self.config.clickup.automation_status) self._notify( f"Content watcher: matched **{filename}** to ClickUp task " f"**{matched_task.name}**.\nStarting content creation pipeline...", category="content", ) # Extract fields from the matched task keyword = matched_task.custom_fields.get("Keyword", "") or matched_task.name url = matched_task.custom_fields.get("IMSURL", "") or "" cli_flags = matched_task.custom_fields.get("CLIFlags", "") or "" args = { "keyword": str(keyword), "url": str(url), "cli_flags": str(cli_flags), "clickup_task_id": task_id, } self._register_execution(task_id, matched_task.name, "create_content") try: if hasattr(self.agent, "_tools") and self.agent._tools: result = self.agent._tools.execute("create_content", args) else: result = "Error: tool registry not available" if result.startswith("Error:"): _pipeline_err_log.info( "CONTENT | task=%s | file=%s | result=%s", task_id, filename, result[:500], ) self._notify( f"Content watcher: pipeline **failed** for **{filename}**.\n" f"Error: {result[:200]}", category="content", ) else: # Success — move file to processed/ processed_dir = xlsx_path.parent / "processed" processed_dir.mkdir(exist_ok=True) dest = processed_dir / filename try: shutil.move(str(xlsx_path), str(dest)) log.info("Moved %s to %s", filename, dest) except OSError as e: log.warning("Could not move %s to processed: %s", filename, e) self._notify( f"Content optimization finished for **{matched_task.name}** — " f"content created, XLSX moved to processed.", category="content", ) except Exception as e: log.error("Content watcher pipeline error for %s: %s", filename, e) client.add_comment( task_id, f"[FAILED]Content pipeline crashed.\n\nError: {str(e)[:2000]}", ) client.update_task_status(task_id, self.config.clickup.error_status) finally: self._unregister_execution(task_id) def _match_xlsx_to_content_task(self, normalized_stem: str): """Find a ClickUp content task whose Keyword matches the file stem. Matches tasks with Work Category in ("Content Creation", "On Page Optimization"). Returns the matched ClickUpTask or None. """ from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: return None try: tasks = client.get_tasks_from_overall_lists(space_id, statuses=list(_CORA_ELIGIBLE_STATUSES)) except Exception as e: log.warning("ClickUp query failed in _match_xlsx_to_content_task: %s", e) return None content_types = ("Content Creation", "On Page Optimization") for task in tasks: if task.status not in _CORA_ELIGIBLE_STATUSES: continue if task.task_type not in content_types: continue keyword = task.custom_fields.get("Keyword", "") if not keyword: continue keyword_norm = _normalize_for_match(str(keyword)) if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check): return task return None # ── Cora Distribution Watcher ── def _cora_distribute_loop(self): """Poll the human Cora inbox and distribute xlsx to pipeline inboxes.""" interval = self.config.link_building.watch_interval_minutes * 60 # Wait before first scan to let other systems initialize self._stop_event.wait(60) while not self._stop_event.is_set(): try: self._scan_cora_human_inbox() self._loop_timestamps["cora_distribute"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Cora distribution watcher error: %s", e) self._interruptible_wait(interval) def _scan_cora_human_inbox(self): """Scan the human Cora inbox for new .xlsx files and distribute them.""" inbox = Path(self.config.autocora.cora_human_inbox) if not inbox.exists(): log.warning("Cora human inbox does not exist: %s", inbox) return xlsx_files = sorted(inbox.glob("*.xlsx")) if not xlsx_files: log.debug("No .xlsx files in Cora human inbox") return # Check processed/ subfolder for already-handled files processed_dir = inbox / "processed" processed_names = set() if processed_dir.exists(): processed_names = {f.name for f in processed_dir.glob("*.xlsx")} for xlsx_path in xlsx_files: filename = xlsx_path.name if filename.startswith("~$"): continue if filename in processed_names: continue log.info("Cora distributor: new .xlsx found: %s", filename) self._distribute_cora_file(xlsx_path) def _distribute_cora_file(self, xlsx_path: Path): """Match a Cora .xlsx to ClickUp tasks and copy to the right pipeline inboxes.""" from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match filename = xlsx_path.name stem = xlsx_path.stem.lower().replace("-", " ").replace("_", " ") stem = re.sub(r"\s+", " ", stem).strip() if not self.config.clickup.enabled: log.warning("Cora distributor: ClickUp disabled, cannot match '%s'", filename) return client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: return try: tasks = client.get_tasks_from_overall_lists(space_id, statuses=list(_CORA_ELIGIBLE_STATUSES)) except Exception as e: log.warning("ClickUp query failed in _distribute_cora_file: %s", e) return # Find ALL matching tasks across all types has_lb = False has_content = False matched_names = [] matched_error_tasks = [] for task in tasks: if task.status not in _CORA_ELIGIBLE_STATUSES: continue keyword = task.custom_fields.get("Keyword", "") if not keyword: continue keyword_norm = _normalize_for_match(str(keyword)) if not _fuzzy_keyword_match(stem, keyword_norm, self._llm_plural_check): continue matched_names.append(task.name) if task.status == self.config.clickup.error_status: matched_error_tasks.append(task) if task.task_type == "Link Building": has_lb = True elif task.task_type in ("Content Creation", "On Page Optimization"): has_content = True if not has_lb and not has_content: log.warning("No ClickUp task match for '%s' — leaving in inbox", filename) self._notify( f"Cora distributor: no ClickUp match for **{filename}**.\n" f"Create a task with Keyword matching '{stem}' to enable distribution.", category="autocora", ) return # Copy to the appropriate pipeline inboxes copied_to = [] try: if has_lb and self.config.link_building.watch_folder: dest_dir = Path(self.config.link_building.watch_folder) dest_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(str(xlsx_path), str(dest_dir / filename)) copied_to.append(f"link building ({dest_dir})") if has_content and self.config.content.cora_inbox: dest_dir = Path(self.config.content.cora_inbox) dest_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(str(xlsx_path), str(dest_dir / filename)) copied_to.append(f"content ({dest_dir})") except OSError as e: log.error("Cora distributor: copy failed for %s: %s", filename, e) self._notify( f"Cora distributor: **copy failed** for **{filename}**.\nError: {e}", category="autocora", ) return # Reset any matched tasks that were in "error" back to "running cora" # so the pipeline picks them up again. for task in matched_error_tasks: try: client.update_task_status(task.id, "running cora") client.add_comment( task.id, f"New Cora XLSX distributed — resetting from error to running cora.", ) log.info("Distributor: reset task %s (%s) from error ->running cora", task.id, task.name) except Exception as e: log.warning("Distributor: failed to reset task %s: %s", task.id, e) # Move original to processed/ processed_dir = xlsx_path.parent / "processed" processed_dir.mkdir(exist_ok=True) try: shutil.move(str(xlsx_path), str(processed_dir / filename)) except OSError as e: log.warning("Could not move %s to processed: %s", filename, e) log.info("Cora distributor: %s ->%s", filename, ", ".join(copied_to)) self._notify( f"Cora distributor: **{filename}** copied to {', '.join(copied_to)}.\n" f"Matched tasks: {', '.join(matched_names)}", category="autocora", ) # Post ClickUp comment on matched tasks self._comment_distributed_tasks(client, xlsx_path.stem, copied_to, tasks, stem) def _comment_distributed_tasks( self, client, xlsx_stem: str, copied_to: list[str], tasks, normalized_stem: str ): """Post a ClickUp comment on tasks when a Cora report is distributed.""" from .tools.autocora import _slugify from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match parts = [] for dest in copied_to: if dest.startswith("link"): parts.append("cora-inbox") elif dest.startswith("content"): parts.append("content-cora-inbox") else: parts.append(dest) dest_label = " and ".join(parts) comment = f"Cora XLSX moved to {dest_label}." # Try to find task_ids from job JSON files task_ids: list[str] = [] jobs_dir = Path(self.config.autocora.jobs_dir) slug = _slugify(xlsx_stem) if jobs_dir.is_dir(): # Check both jobs/ root and processed/ subfolder search_dirs = [jobs_dir] processed = jobs_dir / "processed" if processed.is_dir(): search_dirs.append(processed) for search_dir in search_dirs: for job_file in search_dir.glob("job-*.json"): # Strip "job-{timestamp}-" prefix to get the slug parts = job_file.stem.split("-", 2) if len(parts) >= 3: job_slug = parts[2] if job_slug == slug: try: data = json.loads(job_file.read_text(encoding="utf-8")) task_ids = data.get("task_ids", []) except (json.JSONDecodeError, OSError) as e: log.warning("Could not read job file %s: %s", job_file, e) break if task_ids: break # Fallback: match from the task list we already have if not task_ids: for task in tasks: keyword = task.custom_fields.get("Keyword", "") if not keyword: continue keyword_norm = _normalize_for_match(str(keyword)) if _fuzzy_keyword_match(normalized_stem, keyword_norm, self._llm_plural_check): task_ids.append(task.id) # Post comments for tid in task_ids: try: client.add_comment(tid, comment) except Exception as e: log.warning("Failed to comment on task %s: %s", tid, e) # ── Morning Briefing ── _CENTRAL = ZoneInfo("America/Chicago") def _briefing_loop(self): """Check every 60s if it's time to send the morning briefing.""" # Wait before first check self._stop_event.wait(30) while not self._stop_event.is_set(): try: forced = self._force_briefing.is_set() if forced: self._force_briefing.clear() now = datetime.now(self._CENTRAL) today = now.strftime("%Y-%m-%d") is_weekday = now.weekday() < 5 # Mon=0 .. Fri=4 target_hour = 6 if is_weekday else 8 target_minute = 30 if is_weekday else 0 at_briefing_time = ( now.hour == target_hour and now.minute == target_minute ) already_sent = self._last_briefing_date == today if forced or (at_briefing_time and not already_sent): self._send_morning_briefing() self._last_briefing_date = today self._loop_timestamps["briefing"] = datetime.now(UTC).isoformat() except Exception as e: log.error("Briefing loop error: %s", e) self._interruptible_wait(60, self._force_briefing) def _send_morning_briefing(self): """Build and send the morning briefing notification.""" client = self._get_clickup_client() space_id = self.config.clickup.space_id if not space_id: log.warning("Briefing skipped — no space_id configured") return # Query tasks in the 4 relevant statuses briefing_statuses = [ "running cora", "outline review", self.config.clickup.pr_review_status, # "pr needs review" self.config.clickup.error_status, # "error" ] try: tasks = client.get_tasks_from_overall_lists( space_id, statuses=briefing_statuses ) except Exception as e: log.error("Briefing: ClickUp query failed: %s", e) return # Bucket tasks by status cora_tasks = [] outline_tasks = [] pr_tasks = [] error_tasks = [] for t in tasks: if t.status == "running cora": cora_tasks.append(t) elif t.status == "outline review": outline_tasks.append(t) elif t.status == self.config.clickup.pr_review_status: pr_tasks.append(t) elif t.status == self.config.clickup.error_status: error_tasks.append(t) # If nothing needs attention, send a short all-clear if not any([cora_tasks, outline_tasks, pr_tasks, error_tasks]): self._notify("Morning briefing: All clear — nothing needs attention.", category="briefing") return lines = ["Morning Briefing", ""] # Section 1: Cora reports if cora_tasks: cora_statuses = self._check_cora_file_status(cora_tasks) lines.append(f"CORA REPORTS ({len(cora_tasks)})") by_customer = self._group_by_customer(cora_tasks) for customer, ctasks in by_customer.items(): lines.append(f" {customer}:") for t in ctasks: status_note = cora_statuses.get(t.id, "") suffix = f" — {status_note}" if status_note else "" lines.append(f" - {t.name}{suffix}") lines.append("") # Section 2: Outlines to approve if outline_tasks: lines.append(f"OUTLINES TO APPROVE ({len(outline_tasks)})") by_customer = self._group_by_customer(outline_tasks) for customer, ctasks in by_customer.items(): lines.append(f" {customer}:") for t in ctasks: lines.append(f" - {t.name}") lines.append("") # Section 3: PRs to review if pr_tasks: lines.append(f"PRs TO REVIEW ({len(pr_tasks)})") by_customer = self._group_by_customer(pr_tasks) for customer, ctasks in by_customer.items(): lines.append(f" {customer}:") for t in ctasks: lines.append(f" - {t.name}") lines.append("") # Section 4: Errors if error_tasks: lines.append(f"ERRORS ({len(error_tasks)})") by_customer = self._group_by_customer(error_tasks) for customer, ctasks in by_customer.items(): lines.append(f" {customer}:") for t in ctasks: lines.append(f" - {t.name}") lines.append("") message = "\n".join(lines).rstrip() self._notify(message, category="briefing") log.info("Morning briefing sent (%d cora, %d outlines, %d PRs, %d errors)", len(cora_tasks), len(outline_tasks), len(pr_tasks), len(error_tasks)) def _group_by_customer(self, tasks) -> dict[str, list]: """Group tasks by their Customer custom field.""" groups: dict[str, list] = {} for t in tasks: customer = t.custom_fields.get("Client", "") or "Unknown" groups.setdefault(str(customer), []).append(t) return groups def _check_cora_file_status(self, cora_tasks) -> dict[str, str]: """For each 'running cora' task, check where its xlsx sits on the network. Returns a dict of task_id ->human-readable status note. """ from .tools.linkbuilding import _fuzzy_keyword_match, _normalize_for_match # Collect xlsx filenames from all relevant folders folders = { "cora_human": Path(self.config.autocora.cora_human_inbox), "cora_human_processed": Path(self.config.autocora.cora_human_inbox) / "processed", "lb_inbox": Path(self.config.link_building.watch_folder), "lb_processed": Path(self.config.link_building.watch_folder) / "processed", "content_inbox": Path(self.config.content.cora_inbox), "content_processed": Path(self.config.content.cora_inbox) / "processed", } # Build a map: normalized_stem ->set of folder keys file_locations: dict[str, set[str]] = {} for folder_key, folder_path in folders.items(): if not folder_path.exists(): continue for xlsx in folder_path.glob("*.xlsx"): if xlsx.name.startswith("~$"): continue stem = xlsx.stem.lower().replace("-", " ").replace("_", " ") stem = re.sub(r"\s+", " ", stem).strip() file_locations.setdefault(stem, set()).add(folder_key) # Match each task's keyword against the file stems result: dict[str, str] = {} for task in cora_tasks: keyword = task.custom_fields.get("Keyword", "") or task.name keyword_norm = _normalize_for_match(str(keyword)) # Find which folders have a matching file matched_folders: set[str] = set() for stem, locs in file_locations.items(): if _fuzzy_keyword_match(keyword_norm, stem, self._llm_plural_check): matched_folders.update(locs) if not matched_folders: result[task.id] = "needs cora.xlsx from worker machine" elif matched_folders & {"lb_inbox", "content_inbox"}: result[task.id] = "queued for processing" elif matched_folders & {"lb_processed", "content_processed"}: result[task.id] = "pipeline complete" elif "cora_human" in matched_folders: result[task.id] = "waiting for distribution" elif "cora_human_processed" in matched_folders: result[task.id] = "distributed, waiting for pipeline" else: result[task.id] = "needs cora.xlsx from worker machine" return result