476 lines
15 KiB
Python
476 lines
15 KiB
Python
"""AutoCora job submission and result polling tools.
|
|
|
|
Submits Cora SEO report jobs to a shared folder queue and polls for results.
|
|
Jobs are JSON files written to a network share; a worker on another machine
|
|
picks them up, runs Cora, and writes result files back.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
import time
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _slugify(text: str) -> str:
|
|
"""Convert text to a filesystem-safe slug."""
|
|
text = text.lower().strip()
|
|
text = re.sub(r"[^\w\s-]", "", text)
|
|
text = re.sub(r"[\s_]+", "-", text)
|
|
return re.sub(r"-+", "-", text).strip("-")[:80]
|
|
|
|
|
|
def _make_job_id(keyword: str) -> str:
|
|
"""Create a unique job ID from keyword + timestamp."""
|
|
ts = str(int(time.time() * 1000))
|
|
slug = _slugify(keyword)
|
|
return f"job-{ts}-{slug}"
|
|
|
|
|
|
def _get_clickup_client(ctx: dict):
|
|
"""Build a ClickUp client from context config."""
|
|
from ..clickup import ClickUpClient
|
|
|
|
config = ctx["config"]
|
|
return ClickUpClient(
|
|
api_token=config.clickup.api_token,
|
|
workspace_id=config.clickup.workspace_id,
|
|
task_type_field_name=config.clickup.task_type_field_name,
|
|
)
|
|
|
|
|
|
def _find_qualifying_tasks(client, config, target_date: str, categories: list[str]):
|
|
"""Find 'to do' tasks in cora_categories due on target_date (single day).
|
|
|
|
Used when target_date is explicitly provided.
|
|
Returns list of ClickUpTask objects.
|
|
"""
|
|
space_id = config.clickup.space_id
|
|
if not space_id:
|
|
return []
|
|
|
|
try:
|
|
dt = datetime.strptime(target_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
|
except ValueError:
|
|
log.warning("Invalid target_date format: %s", target_date)
|
|
return []
|
|
|
|
day_start_ms = int(dt.timestamp() * 1000)
|
|
day_end_ms = day_start_ms + 24 * 60 * 60 * 1000
|
|
|
|
tasks = client.get_tasks_from_space(
|
|
space_id,
|
|
statuses=["to do"],
|
|
due_date_lt=day_end_ms,
|
|
)
|
|
|
|
qualifying = []
|
|
for task in tasks:
|
|
if task.task_type not in categories:
|
|
continue
|
|
if not task.due_date:
|
|
continue
|
|
try:
|
|
task_due_ms = int(task.due_date)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if task_due_ms < day_start_ms or task_due_ms >= day_end_ms:
|
|
continue
|
|
qualifying.append(task)
|
|
|
|
return qualifying
|
|
|
|
|
|
def _find_qualifying_tasks_sweep(client, config, categories: list[str]):
|
|
"""Multi-pass sweep for qualifying tasks when no explicit date is given.
|
|
|
|
Pass 1: Tasks due today
|
|
Pass 2: Overdue tasks tagged with current month (e.g. "feb26")
|
|
Pass 3: Tasks tagged with last month (e.g. "jan26"), still "to do"
|
|
Pass 4: Tasks due in next 2 days (look-ahead)
|
|
|
|
Deduplicates across passes by task ID.
|
|
Returns list of ClickUpTask objects.
|
|
"""
|
|
space_id = config.clickup.space_id
|
|
if not space_id:
|
|
return []
|
|
|
|
now = datetime.now(UTC)
|
|
today_start_ms = int(
|
|
now.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000
|
|
)
|
|
today_end_ms = today_start_ms + 24 * 60 * 60 * 1000
|
|
lookahead_end_ms = today_start_ms + 3 * 24 * 60 * 60 * 1000 # +2 days
|
|
|
|
# Current and last month tags (e.g. "feb26", "jan26")
|
|
current_month_tag = now.strftime("%b%y").lower()
|
|
# Go back one month
|
|
if now.month == 1:
|
|
last_month = now.replace(year=now.year - 1, month=12)
|
|
else:
|
|
last_month = now.replace(month=now.month - 1)
|
|
last_month_tag = last_month.strftime("%b%y").lower()
|
|
|
|
# Fetch all "to do" tasks with due dates up to lookahead
|
|
all_tasks = client.get_tasks_from_space(
|
|
space_id,
|
|
statuses=["to do"],
|
|
due_date_lt=lookahead_end_ms,
|
|
)
|
|
|
|
# Filter to cora categories
|
|
cora_tasks = [t for t in all_tasks if t.task_type in categories]
|
|
|
|
seen_ids: set[str] = set()
|
|
qualifying: list = []
|
|
|
|
def _add(task):
|
|
if task.id not in seen_ids:
|
|
seen_ids.add(task.id)
|
|
qualifying.append(task)
|
|
|
|
# Pass 1: Due today
|
|
for task in cora_tasks:
|
|
if not task.due_date:
|
|
continue
|
|
try:
|
|
due_ms = int(task.due_date)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if today_start_ms <= due_ms < today_end_ms:
|
|
_add(task)
|
|
|
|
# Pass 2: Overdue + tagged with current month
|
|
for task in cora_tasks:
|
|
if not task.due_date:
|
|
continue
|
|
try:
|
|
due_ms = int(task.due_date)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if due_ms < today_start_ms and current_month_tag in task.tags:
|
|
_add(task)
|
|
|
|
# Pass 3: Tagged with last month, still "to do"
|
|
for task in cora_tasks:
|
|
if last_month_tag in task.tags:
|
|
_add(task)
|
|
|
|
# Pass 4: Look-ahead (due in next 2 days, excluding today which was pass 1)
|
|
for task in cora_tasks:
|
|
if not task.due_date:
|
|
continue
|
|
try:
|
|
due_ms = int(task.due_date)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
if today_end_ms <= due_ms < lookahead_end_ms:
|
|
_add(task)
|
|
|
|
log.info(
|
|
"AutoCora sweep: %d qualifying tasks "
|
|
"(today=%d, overdue+month=%d, last_month=%d, lookahead=%d)",
|
|
len(qualifying),
|
|
sum(1 for t in qualifying if _is_due_today(t, today_start_ms, today_end_ms)),
|
|
sum(1 for t in qualifying if _is_overdue_with_tag(t, today_start_ms, current_month_tag)),
|
|
sum(1 for t in qualifying if last_month_tag in t.tags),
|
|
sum(1 for t in qualifying if _is_lookahead(t, today_end_ms, lookahead_end_ms)),
|
|
)
|
|
|
|
return qualifying
|
|
|
|
|
|
def _is_due_today(task, start_ms, end_ms) -> bool:
|
|
try:
|
|
due = int(task.due_date)
|
|
return start_ms <= due < end_ms
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
|
|
def _is_overdue_with_tag(task, today_start_ms, tag) -> bool:
|
|
try:
|
|
due = int(task.due_date)
|
|
return due < today_start_ms and tag in task.tags
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
|
|
def _is_lookahead(task, today_end_ms, lookahead_end_ms) -> bool:
|
|
try:
|
|
due = int(task.due_date)
|
|
return today_end_ms <= due < lookahead_end_ms
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
|
|
|
|
def _group_by_keyword(tasks, all_tasks):
|
|
"""Group tasks by normalized keyword, pulling in sibling tasks from all_tasks.
|
|
|
|
Returns dict: {keyword_lower: {"keyword": str, "url": str, "task_ids": [str]}}
|
|
Alerts list for tasks missing Keyword or IMSURL.
|
|
"""
|
|
alerts = []
|
|
groups: dict[str, dict] = {}
|
|
|
|
# Index all tasks by keyword for sibling lookup
|
|
all_by_keyword: dict[str, list] = {}
|
|
for t in all_tasks:
|
|
kw = t.custom_fields.get("Keyword", "") or ""
|
|
kw = str(kw).strip()
|
|
if kw:
|
|
all_by_keyword.setdefault(kw.lower(), []).append(t)
|
|
|
|
for task in tasks:
|
|
keyword = task.custom_fields.get("Keyword", "") or ""
|
|
keyword = str(keyword).strip()
|
|
if not keyword:
|
|
alerts.append(f"Task '{task.name}' (id={task.id}) missing Keyword field")
|
|
continue
|
|
|
|
url = task.custom_fields.get("IMSURL", "") or ""
|
|
url = str(url).strip()
|
|
if not url:
|
|
url = "https://seotoollab.com/blank.html"
|
|
|
|
kw_lower = keyword.lower()
|
|
if kw_lower not in groups:
|
|
# Collect ALL task IDs sharing this keyword
|
|
sibling_ids = set()
|
|
for sibling in all_by_keyword.get(kw_lower, []):
|
|
sibling_ids.add(sibling.id)
|
|
sibling_ids.add(task.id)
|
|
groups[kw_lower] = {
|
|
"keyword": keyword,
|
|
"url": url,
|
|
"task_ids": sorted(sibling_ids),
|
|
}
|
|
else:
|
|
# Add this task's ID if not already there
|
|
if task.id not in groups[kw_lower]["task_ids"]:
|
|
groups[kw_lower]["task_ids"].append(task.id)
|
|
groups[kw_lower]["task_ids"].sort()
|
|
|
|
return groups, alerts
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tools
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@tool(
|
|
"submit_autocora_jobs",
|
|
"Submit Cora SEO report jobs for ClickUp tasks. Uses a multi-pass sweep "
|
|
"(today, overdue, last month, look-ahead) unless a specific date is given. "
|
|
"Writes job JSON files to the AutoCora shared folder queue.",
|
|
category="autocora",
|
|
)
|
|
def submit_autocora_jobs(target_date: str = "", ctx: dict | None = None) -> str:
|
|
"""Submit AutoCora jobs for qualifying ClickUp tasks.
|
|
|
|
Args:
|
|
target_date: Date to check (YYYY-MM-DD). Empty = multi-pass sweep.
|
|
ctx: Injected context with config, db, etc.
|
|
"""
|
|
if not ctx:
|
|
return "Error: context not available"
|
|
|
|
config = ctx["config"]
|
|
autocora = config.autocora
|
|
|
|
if not autocora.enabled:
|
|
return "AutoCora is disabled in config."
|
|
|
|
if not config.clickup.api_token:
|
|
return "Error: ClickUp API token not configured"
|
|
|
|
client = _get_clickup_client(ctx)
|
|
|
|
# Find qualifying tasks — sweep or single-day
|
|
if target_date:
|
|
qualifying = _find_qualifying_tasks(client, config, target_date, autocora.cora_categories)
|
|
label = target_date
|
|
else:
|
|
qualifying = _find_qualifying_tasks_sweep(client, config, autocora.cora_categories)
|
|
label = "sweep"
|
|
|
|
if not qualifying:
|
|
return f"No qualifying tasks found ({label})."
|
|
|
|
# Group by keyword — only siblings that also passed the sweep qualify
|
|
groups, alerts = _group_by_keyword(qualifying, qualifying)
|
|
|
|
if not groups and alerts:
|
|
return "No jobs submitted.\n\n" + "\n".join(f"- {a}" for a in alerts)
|
|
|
|
# Ensure jobs directory exists
|
|
jobs_dir = Path(autocora.jobs_dir)
|
|
jobs_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
submitted = []
|
|
skipped = []
|
|
|
|
for kw_lower, group in groups.items():
|
|
# Check if a job file already exists for this keyword (dedup by file)
|
|
existing_jobs = list(jobs_dir.glob(f"job-*-{_slugify(group['keyword'])}*.json"))
|
|
if existing_jobs:
|
|
skipped.append(group["keyword"])
|
|
continue
|
|
|
|
# Write job file (contains task_ids for the result poller)
|
|
job_id = _make_job_id(group["keyword"])
|
|
job_data = {
|
|
"keyword": group["keyword"],
|
|
"url": group["url"],
|
|
"task_ids": group["task_ids"],
|
|
}
|
|
job_path = jobs_dir / f"{job_id}.json"
|
|
job_path.write_text(json.dumps(job_data, indent=2), encoding="utf-8")
|
|
|
|
# Move ClickUp tasks to "automation underway"
|
|
for tid in group["task_ids"]:
|
|
client.update_task_status(tid, "automation underway")
|
|
|
|
submitted.append(group["keyword"])
|
|
log.info("Submitted AutoCora job: %s -> %s", group["keyword"], job_id)
|
|
|
|
# Build response
|
|
lines = [f"AutoCora submission ({label}):"]
|
|
if submitted:
|
|
lines.append(f"\nSubmitted {len(submitted)} job(s):")
|
|
for kw in submitted:
|
|
lines.append(f" - {kw}")
|
|
if skipped:
|
|
lines.append(f"\nSkipped {len(skipped)} (job file already exists):")
|
|
for kw in skipped:
|
|
lines.append(f" - {kw}")
|
|
if alerts:
|
|
lines.append(f"\nAlerts ({len(alerts)}):")
|
|
for a in alerts:
|
|
lines.append(f" - {a}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
@tool(
|
|
"poll_autocora_results",
|
|
"Poll the AutoCora results folder for completed Cora SEO report jobs. "
|
|
"Scans for .result files, reads task_ids from the JSON, updates ClickUp, "
|
|
"then moves the result file to a processed/ subfolder.",
|
|
category="autocora",
|
|
)
|
|
def poll_autocora_results(ctx: dict | None = None) -> str:
|
|
"""Poll for AutoCora results and update ClickUp tasks.
|
|
|
|
Scans the results folder for .result files. Each result file is JSON
|
|
containing {status, task_ids, keyword, ...}. After processing, the
|
|
result file is moved to results/processed/ to avoid re-processing.
|
|
"""
|
|
if not ctx:
|
|
return "Error: context not available"
|
|
|
|
config = ctx["config"]
|
|
autocora = config.autocora
|
|
|
|
if not autocora.enabled:
|
|
return "AutoCora is disabled in config."
|
|
|
|
results_dir = Path(autocora.results_dir)
|
|
if not results_dir.exists():
|
|
return f"Results directory does not exist: {results_dir}"
|
|
|
|
# Scan for .result files
|
|
result_files = list(results_dir.glob("*.result"))
|
|
if not result_files:
|
|
return "No result files found in results folder."
|
|
|
|
client = None
|
|
if config.clickup.api_token:
|
|
client = _get_clickup_client(ctx)
|
|
|
|
processed_dir = results_dir / "processed"
|
|
processed = []
|
|
|
|
for result_path in result_files:
|
|
raw = result_path.read_text(encoding="utf-8").strip()
|
|
result_data = _parse_result(raw)
|
|
|
|
task_ids = result_data.get("task_ids", [])
|
|
status = result_data.get("status", "UNKNOWN")
|
|
keyword = result_data.get("keyword", result_path.stem)
|
|
|
|
if status == "SUCCESS":
|
|
if client and task_ids:
|
|
for tid in task_ids:
|
|
client.update_task_status(tid, autocora.success_status)
|
|
client.add_comment(tid, f"Cora report generated for \"{keyword}\" — ready for you to look at it.")
|
|
|
|
processed.append(f"SUCCESS: {keyword}")
|
|
log.info("AutoCora SUCCESS: %s", keyword)
|
|
|
|
elif status == "FAILURE":
|
|
reason = result_data.get("reason", "unknown error")
|
|
if client and task_ids:
|
|
for tid in task_ids:
|
|
client.update_task_status(tid, autocora.error_status)
|
|
client.add_comment(
|
|
tid, f"Cora report failed for keyword: {keyword}\nReason: {reason}"
|
|
)
|
|
|
|
processed.append(f"FAILURE: {keyword} ({reason})")
|
|
log.info("AutoCora FAILURE: %s — %s", keyword, reason)
|
|
|
|
else:
|
|
processed.append(f"UNKNOWN: {keyword} (status={status})")
|
|
|
|
# Move result file to processed/ so it's not re-processed
|
|
processed_dir.mkdir(exist_ok=True)
|
|
try:
|
|
result_path.rename(processed_dir / result_path.name)
|
|
except OSError as e:
|
|
log.warning("Could not move result file %s: %s", result_path.name, e)
|
|
|
|
# Build response
|
|
lines = ["AutoCora poll results:"]
|
|
if processed:
|
|
lines.append(f"\nProcessed {len(processed)} result(s):")
|
|
for p in processed:
|
|
lines.append(f" - {p}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _parse_result(raw: str) -> dict:
|
|
"""Parse a result file — JSON format or legacy plain text."""
|
|
# Try JSON first
|
|
try:
|
|
data = json.loads(raw)
|
|
if isinstance(data, dict):
|
|
return data
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Legacy plain text: "SUCCESS" or "FAILURE: reason"
|
|
if raw.startswith("SUCCESS"):
|
|
return {"status": "SUCCESS"}
|
|
if raw.startswith("FAILURE"):
|
|
reason = raw.split(":", 1)[1].strip() if ":" in raw else "unknown"
|
|
return {"status": "FAILURE", "reason": reason}
|
|
|
|
return {"status": "UNKNOWN", "raw": raw}
|