"""AutoCora job submission and result polling tools. Submits Cora SEO report jobs to a shared folder queue and polls for results. Jobs are JSON files written to a network share; a worker on another machine picks them up, runs Cora, and writes result files back. """ from __future__ import annotations import json import logging import re import time from datetime import UTC, datetime from pathlib import Path from . import tool log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _slugify(text: str) -> str: """Convert text to a filesystem-safe slug.""" text = text.lower().strip() text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[\s_]+", "-", text) return re.sub(r"-+", "-", text).strip("-")[:80] def _make_job_id(keyword: str) -> str: """Create a unique job ID from keyword + timestamp.""" ts = str(int(time.time() * 1000)) slug = _slugify(keyword) return f"job-{ts}-{slug}" def _get_clickup_client(ctx: dict): """Build a ClickUp client from context config.""" from ..clickup import ClickUpClient config = ctx["config"] return ClickUpClient( api_token=config.clickup.api_token, workspace_id=config.clickup.workspace_id, task_type_field_name=config.clickup.task_type_field_name, ) def _find_qualifying_tasks(client, config, target_date: str, categories: list[str]): """Find 'to do' tasks in cora_categories due on target_date (single day). Used when target_date is explicitly provided. Returns list of ClickUpTask objects. """ space_id = config.clickup.space_id if not space_id: return [] try: dt = datetime.strptime(target_date, "%Y-%m-%d").replace(tzinfo=UTC) except ValueError: log.warning("Invalid target_date format: %s", target_date) return [] day_start_ms = int(dt.timestamp() * 1000) day_end_ms = day_start_ms + 24 * 60 * 60 * 1000 tasks = client.get_tasks_from_space( space_id, statuses=["to do"], due_date_lt=day_end_ms, ) qualifying = [] for task in tasks: if task.task_type not in categories: continue if not task.due_date: continue try: task_due_ms = int(task.due_date) except (ValueError, TypeError): continue if task_due_ms < day_start_ms or task_due_ms >= day_end_ms: continue qualifying.append(task) return qualifying def _find_qualifying_tasks_sweep(client, config, categories: list[str]): """Multi-pass sweep for qualifying tasks when no explicit date is given. Pass 1: Tasks due today Pass 2: Overdue tasks tagged with current month (e.g. "feb26") Pass 3: Tasks tagged with last month (e.g. "jan26"), still "to do" Pass 4: Tasks due in next 2 days (look-ahead) Deduplicates across passes by task ID. Returns list of ClickUpTask objects. """ space_id = config.clickup.space_id if not space_id: return [] now = datetime.now(UTC) today_start_ms = int( now.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000 ) today_end_ms = today_start_ms + 24 * 60 * 60 * 1000 lookahead_end_ms = today_start_ms + 3 * 24 * 60 * 60 * 1000 # +2 days # Current and last month tags (e.g. "feb26", "jan26") current_month_tag = now.strftime("%b%y").lower() # Go back one month if now.month == 1: last_month = now.replace(year=now.year - 1, month=12) else: last_month = now.replace(month=now.month - 1) last_month_tag = last_month.strftime("%b%y").lower() # Fetch all "to do" tasks with due dates up to lookahead all_tasks = client.get_tasks_from_space( space_id, statuses=["to do"], due_date_lt=lookahead_end_ms, ) # Filter to cora categories cora_tasks = [t for t in all_tasks if t.task_type in categories] seen_ids: set[str] = set() qualifying: list = [] def _add(task): if task.id not in seen_ids: seen_ids.add(task.id) qualifying.append(task) # Pass 1: Due today for task in cora_tasks: if not task.due_date: continue try: due_ms = int(task.due_date) except (ValueError, TypeError): continue if today_start_ms <= due_ms < today_end_ms: _add(task) # Pass 2: Overdue + tagged with current month for task in cora_tasks: if not task.due_date: continue try: due_ms = int(task.due_date) except (ValueError, TypeError): continue if due_ms < today_start_ms and current_month_tag in task.tags: _add(task) # Pass 3: Tagged with last month, still "to do" for task in cora_tasks: if last_month_tag in task.tags: _add(task) # Pass 4: Look-ahead (due in next 2 days, excluding today which was pass 1) for task in cora_tasks: if not task.due_date: continue try: due_ms = int(task.due_date) except (ValueError, TypeError): continue if today_end_ms <= due_ms < lookahead_end_ms: _add(task) log.info( "AutoCora sweep: %d qualifying tasks " "(today=%d, overdue+month=%d, last_month=%d, lookahead=%d)", len(qualifying), sum(1 for t in qualifying if _is_due_today(t, today_start_ms, today_end_ms)), sum(1 for t in qualifying if _is_overdue_with_tag(t, today_start_ms, current_month_tag)), sum(1 for t in qualifying if last_month_tag in t.tags), sum(1 for t in qualifying if _is_lookahead(t, today_end_ms, lookahead_end_ms)), ) return qualifying def _is_due_today(task, start_ms, end_ms) -> bool: try: due = int(task.due_date) return start_ms <= due < end_ms except (ValueError, TypeError): return False def _is_overdue_with_tag(task, today_start_ms, tag) -> bool: try: due = int(task.due_date) return due < today_start_ms and tag in task.tags except (ValueError, TypeError): return False def _is_lookahead(task, today_end_ms, lookahead_end_ms) -> bool: try: due = int(task.due_date) return today_end_ms <= due < lookahead_end_ms except (ValueError, TypeError): return False def _group_by_keyword(tasks, all_tasks): """Group tasks by normalized keyword, pulling in sibling tasks from all_tasks. Returns dict: {keyword_lower: {"keyword": str, "url": str, "task_ids": [str]}} Alerts list for tasks missing Keyword or IMSURL. """ alerts = [] groups: dict[str, dict] = {} # Index all tasks by keyword for sibling lookup all_by_keyword: dict[str, list] = {} for t in all_tasks: kw = t.custom_fields.get("Keyword", "") or "" kw = str(kw).strip() if kw: all_by_keyword.setdefault(kw.lower(), []).append(t) for task in tasks: keyword = task.custom_fields.get("Keyword", "") or "" keyword = str(keyword).strip() if not keyword: alerts.append(f"Task '{task.name}' (id={task.id}) missing Keyword field") continue url = task.custom_fields.get("IMSURL", "") or "" url = str(url).strip() if not url: url = "https://seotoollab.com/blank.html" kw_lower = keyword.lower() if kw_lower not in groups: # Collect ALL task IDs sharing this keyword sibling_ids = set() for sibling in all_by_keyword.get(kw_lower, []): sibling_ids.add(sibling.id) sibling_ids.add(task.id) groups[kw_lower] = { "keyword": keyword, "url": url, "task_ids": sorted(sibling_ids), } else: # Add this task's ID if not already there if task.id not in groups[kw_lower]["task_ids"]: groups[kw_lower]["task_ids"].append(task.id) groups[kw_lower]["task_ids"].sort() return groups, alerts # --------------------------------------------------------------------------- # Tools # --------------------------------------------------------------------------- @tool( "submit_autocora_jobs", "Submit Cora SEO report jobs for ClickUp tasks. Uses a multi-pass sweep " "(today, overdue, last month, look-ahead) unless a specific date is given. " "Writes job JSON files to the AutoCora shared folder queue.", category="autocora", ) def submit_autocora_jobs(target_date: str = "", ctx: dict | None = None) -> str: """Submit AutoCora jobs for qualifying ClickUp tasks. Args: target_date: Date to check (YYYY-MM-DD). Empty = multi-pass sweep. ctx: Injected context with config, db, etc. """ if not ctx: return "Error: context not available" config = ctx["config"] autocora = config.autocora if not autocora.enabled: return "AutoCora is disabled in config." if not config.clickup.api_token: return "Error: ClickUp API token not configured" client = _get_clickup_client(ctx) # Find qualifying tasks — sweep or single-day if target_date: qualifying = _find_qualifying_tasks(client, config, target_date, autocora.cora_categories) label = target_date else: qualifying = _find_qualifying_tasks_sweep(client, config, autocora.cora_categories) label = "sweep" if not qualifying: return f"No qualifying tasks found ({label})." # Group by keyword — only siblings that also passed the sweep qualify groups, alerts = _group_by_keyword(qualifying, qualifying) if not groups and alerts: return "No jobs submitted.\n\n" + "\n".join(f"- {a}" for a in alerts) # Ensure jobs directory exists jobs_dir = Path(autocora.jobs_dir) jobs_dir.mkdir(parents=True, exist_ok=True) submitted = [] skipped = [] for kw_lower, group in groups.items(): # Check if a job file already exists for this keyword (dedup by file) existing_jobs = list(jobs_dir.glob(f"job-*-{_slugify(group['keyword'])}*.json")) if existing_jobs: skipped.append(group["keyword"]) continue # Write job file (contains task_ids for the result poller) job_id = _make_job_id(group["keyword"]) job_data = { "keyword": group["keyword"], "url": group["url"], "task_ids": group["task_ids"], } job_path = jobs_dir / f"{job_id}.json" job_path.write_text(json.dumps(job_data, indent=2), encoding="utf-8") # Move ClickUp tasks to "automation underway" for tid in group["task_ids"]: client.update_task_status(tid, "automation underway") submitted.append(group["keyword"]) log.info("Submitted AutoCora job: %s -> %s", group["keyword"], job_id) # Build response lines = [f"AutoCora submission ({label}):"] if submitted: lines.append(f"\nSubmitted {len(submitted)} job(s):") for kw in submitted: lines.append(f" - {kw}") if skipped: lines.append(f"\nSkipped {len(skipped)} (job file already exists):") for kw in skipped: lines.append(f" - {kw}") if alerts: lines.append(f"\nAlerts ({len(alerts)}):") for a in alerts: lines.append(f" - {a}") return "\n".join(lines) @tool( "poll_autocora_results", "Poll the AutoCora results folder for completed Cora SEO report jobs. " "Scans for .result files, reads task_ids from the JSON, updates ClickUp, " "then moves the result file to a processed/ subfolder.", category="autocora", ) def poll_autocora_results(ctx: dict | None = None) -> str: """Poll for AutoCora results and update ClickUp tasks. Scans the results folder for .result files. Each result file is JSON containing {status, task_ids, keyword, ...}. After processing, the result file is moved to results/processed/ to avoid re-processing. """ if not ctx: return "Error: context not available" config = ctx["config"] autocora = config.autocora if not autocora.enabled: return "AutoCora is disabled in config." results_dir = Path(autocora.results_dir) if not results_dir.exists(): return f"Results directory does not exist: {results_dir}" # Scan for .result files result_files = list(results_dir.glob("*.result")) if not result_files: return "No result files found in results folder." client = None if config.clickup.api_token: client = _get_clickup_client(ctx) processed_dir = results_dir / "processed" processed = [] for result_path in result_files: raw = result_path.read_text(encoding="utf-8").strip() result_data = _parse_result(raw) task_ids = result_data.get("task_ids", []) status = result_data.get("status", "UNKNOWN") keyword = result_data.get("keyword", result_path.stem) if status == "SUCCESS": if client and task_ids: for tid in task_ids: client.update_task_status(tid, autocora.success_status) client.add_comment(tid, f"Cora report generated for \"{keyword}\" — ready for you to look at it.") processed.append(f"SUCCESS: {keyword}") log.info("AutoCora SUCCESS: %s", keyword) elif status == "FAILURE": reason = result_data.get("reason", "unknown error") if client and task_ids: for tid in task_ids: client.update_task_status(tid, autocora.error_status) client.add_comment( tid, f"Cora report failed for keyword: {keyword}\nReason: {reason}" ) processed.append(f"FAILURE: {keyword} ({reason})") log.info("AutoCora FAILURE: %s — %s", keyword, reason) else: processed.append(f"UNKNOWN: {keyword} (status={status})") # Move result file to processed/ so it's not re-processed processed_dir.mkdir(exist_ok=True) try: result_path.rename(processed_dir / result_path.name) except OSError as e: log.warning("Could not move result file %s: %s", result_path.name, e) # Build response lines = ["AutoCora poll results:"] if processed: lines.append(f"\nProcessed {len(processed)} result(s):") for p in processed: lines.append(f" - {p}") return "\n".join(lines) def _parse_result(raw: str) -> dict: """Parse a result file — JSON format or legacy plain text.""" # Try JSON first try: data = json.loads(raw) if isinstance(data, dict): return data except json.JSONDecodeError: pass # Legacy plain text: "SUCCESS" or "FAILURE: reason" if raw.startswith("SUCCESS"): return {"status": "SUCCESS"} if raw.startswith("FAILURE"): reason = raw.split(":", 1)[1].strip() if ":" in raw else "unknown" return {"status": "FAILURE", "reason": reason} return {"status": "UNKNOWN", "raw": raw}