diff --git a/CLAUDE.md b/CLAUDE.md index 92fe768..d5c03e7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -174,9 +174,11 @@ skill_map: "Press Release": tool: "write_press_releases" auto_execute: true + required_fields: [topic, company_name, target_url] field_mapping: - topic: "task_name" # uses ClickUp task name + topic: "PR Topic" # ClickUp custom field for PR topic/keyword company_name: "Customer" # looks up "Customer" custom field + target_url: "IMSURL" # target money-site URL (required) ``` Task lifecycle: `to do` → discovered → approved/awaiting_approval → executing → completed/failed (+ attachments uploaded) diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py index cdfb12f..77ae35a 100644 --- a/cheddahbot/scheduler.py +++ b/cheddahbot/scheduler.py @@ -388,6 +388,21 @@ class Scheduler: task_id = task.id client = self._get_clickup_client() + # Validate required fields before starting + args = self._build_tool_args_from_task(task, mapping) + required = mapping.get("required_fields", []) + missing = [f for f in required if not args.get(f)] + if missing: + field_mapping = mapping.get("field_mapping", {}) + missing_clickup = [field_mapping.get(f, f) for f in missing] + msg = f"Skipped: missing required field(s): {', '.join(missing_clickup)}" + log.info("Skipping ClickUp task %s (%s) — %s", task_id, task.name, msg) + self._notify( + f"Skipped ClickUp task: **{task.name}**\n{msg}", + category="clickup", + ) + return + # Move to "automation underway" on ClickUp immediately client.update_task_status(task_id, self.config.clickup.automation_status) @@ -395,8 +410,7 @@ class Scheduler: self._notify(f"Executing ClickUp task: **{task.name}** → Skill: `{tool_name}`") try: - # Build tool arguments from field mapping - args = self._build_tool_args_from_task(task, mapping) + # args already built during validation above args["clickup_task_id"] = task_id # Execute the skill via the tool registry diff --git a/config.yaml b/config.yaml index d6760ff..97ced84 100644 --- a/config.yaml +++ b/config.yaml @@ -53,8 +53,9 @@ clickup: "Press Release": tool: "write_press_releases" auto_execute: true + required_fields: [topic, company_name, target_url] field_mapping: - topic: "task_name" + topic: "PR Topic" company_name: "Customer" target_url: "IMSURL" branded_url: "SocialURL" diff --git a/docs/task-pipeline-flows.md b/docs/task-pipeline-flows.md new file mode 100644 index 0000000..57e3c9f --- /dev/null +++ b/docs/task-pipeline-flows.md @@ -0,0 +1,415 @@ +# CheddahBot Task Pipeline Flows — Complete Reference + +## ClickUp Statuses Used + +These are the ClickUp task statuses that CheddahBot reads and writes: + +| Status | Set By | Meaning | +|--------|--------|---------| +| `to do` | Human (or default) | Task is waiting to be picked up | +| `automation underway` | CheddahBot | Bot is actively working on this task | +| `running cora` | CheddahBot (AutoCora) | Cora report is being generated by external worker | +| `outline review` | CheddahBot (Content) | Phase 1 outline is ready for human review | +| `outline approved` | Human | Human reviewed the outline, ready for Phase 2 | +| `internal review` | CheddahBot | Bot finished, deliverables ready for human review | +| `complete` | CheddahBot (Link Building) | Pipeline fully done | +| `error` | CheddahBot | Something failed, needs attention | +| `in progress` | (configured but not used in automation) | — | + +**What CheddahBot polls for:** `["to do", "outline approved"]` (config.yaml line 45) + +--- + +## ClickUp Custom Fields Used + +| Field Name | Type | Used By | What It Holds | +|------------|------|---------|---------------| +| `Work Category` | Dropdown | All pipelines | Determines which pipeline runs: "Press Release", "Link Building", "On Page Optimization", "Content Creation" | +| `PR Topic` | Text | Press Release | Press release topic/keyword (e.g. "Peek Plastic") — required | +| `Customer` | Text | Press Release | Client/company name — required | +| `Keyword` | Text | Link Building, Content, OPT | Target SEO keyword | +| `IMSURL` | Text | All pipelines | Target page URL (money site) — required for Press Release | +| `SocialURL` | Text | Press Release | Branded/social URL for the PR | +| `LB Method` | Dropdown | Link Building | "Cora Backlinks" or other methods | +| `CustomAnchors` | Text | Link Building | Custom anchor text overrides | +| `BrandedPlusRatio` | Number | Link Building | Ratio for branded anchors (default 0.7) | +| `CLIFlags` | Text | Link Building, Content, OPT | Extra flags passed to tools (e.g., "service") | +| `CoraFile` | Text | Link Building | Path to Cora xlsx file | + +**Tags:** Tasks are tagged with month in `mmmyy` format (e.g., `feb26`, `mar26`). + +--- + +## Background Threads + +CheddahBot runs 6 daemon threads. All start at boot and run until shutdown. + +| Thread | Interval | What It Does | +|--------|----------|-------------| +| **poll** | 60 seconds | Runs cron-scheduled tasks from the database | +| **heartbeat** | 30 minutes | Reads HEARTBEAT.md checklist, takes action if needed | +| **clickup** | 20 minutes | Polls ClickUp for tasks to auto-execute (only Press Releases currently) | +| **folder_watch** | 60 minutes | Scans `Z:/cora-inbox` for .xlsx files → triggers Link Building | +| **autocora** | 5 minutes | Submits Cora jobs for today's tasks + polls for results | +| **content_watch** | 60 minutes | Scans `Z:/content-cora-inbox` for .xlsx files → triggers Content/OPT Phase 1 | + +--- + +## Pipeline 1: PRESS RELEASE + +**Work Category:** "Press Release" +**auto_execute:** TRUE — the only pipeline that runs automatically from ClickUp polling +**Tool:** `write_press_releases` + +### Flow + +``` +CLICKUP POLL (every 20 min) + │ + ├─ Finds task with Work Category = "Press Release", status = "to do", due within 3 weeks + │ + ▼ +CHECK LOCAL DB + │ Key: clickup:task:{id}:state + │ If state = "executing" or "completed" or "failed" → SKIP (already handled) + │ + ▼ +SET STATUS → "automation underway" + │ ClickUp API: PUT /task/{id} status + │ Local DB: state = "executing" + │ + ▼ +STEP 1: Generate 7 Headlines (chat brain - GPT-4o-mini) + │ Uses configured chat model + │ Saves to: data/generated/press_releases/{company}/{slug}_headlines.txt + │ + ▼ +STEP 2: AI Judge Picks Best 2 (chat brain) + │ Filters out rule-violating headlines (colons, superlatives, etc.) + │ Falls back to first 2 if judge returns < 2 + │ + ▼ +STEP 3: Write 2 Full Press Releases (execution brain - Claude Code CLI) + │ For each winning headline: + │ - Claude writes full 575-800 word PR + │ - Validates anchor phrase + │ - Saves .txt and .docx + │ - Uploads .docx to ClickUp as attachment + │ + ▼ +STEP 4: Generate JSON-LD Schemas (execution brain - Sonnet) + │ For each PR: + │ - Generates NewsArticle schema + │ - Saves .json file + │ + ▼ +SET STATUS → "internal review" + │ ClickUp API: comment with results + PUT status + │ Local DB: state = "completed" + │ + ▼ +DONE — Human reviews in ClickUp +``` + +### ClickUp Fields Read +- `PR Topic` → press release topic/keyword (required) +- `Customer` → company name in PR (required) +- `IMSURL` → target URL for anchor link (required) +- `SocialURL` → branded URL (optional) + +### What Can Go Wrong +- **BUG: Crash mid-step → stuck forever.** DB says "executing", never retries. Manual reset needed. +- **BUG: DB says "completed" but ClickUp API failed → out of sync.** DB written before API call. +- **BUG: Attachment upload fails silently.** Task marked complete, files missing from ClickUp. +- Headline generation returns empty → tool exits with error, task marked "failed" +- Schema JSON invalid → warning logged but task still completes + +--- + +## Pipeline 2: LINK BUILDING (Cora Backlinks) + +**Work Category:** "Link Building" +**auto_execute:** FALSE — triggered by folder watcher, not ClickUp polling +**Tool:** `run_cora_backlinks` + +### Full Lifecycle (3 stages) + +``` +STAGE A: AUTOCORA SUBMITS CORA JOB +══════════════════════════════════ + +AUTOCORA LOOP (every 5 min) + │ + ├─ Calls submit_autocora_jobs(target_date = today) + │ Finds tasks: Work Category in ["Link Building", "On Page Optimization", "Content Creation"] + │ status = "to do" + │ due date = TODAY (exact 24h window) ← ★ BUG: misses overdue tasks + │ + ├─ Groups tasks by Keyword (case-insensitive) + │ If same keyword across multiple tasks → one job covers all + │ + ├─ For each keyword group: + │ Check local DB: autocora:job:{keyword_lower} + │ If already submitted → SKIP + │ + ▼ +WRITE JOB FILE + │ Path: //PennQnap1/SHARE1/AutoCora/jobs/{job-id}.json + │ Content: {"keyword": "...", "url": "IMSURL", "task_ids": ["id1", "id2"]} + │ Local DB: autocora:job:{keyword} = {status: "submitted", job_id: "..."} + │ + ▼ +SET ALL TASK STATUSES → "automation underway" + + +STAGE B: EXTERNAL WORKER RUNS CORA (not CheddahBot code) +═════════════════════════════════════════════════════════ + +Worker on another machine: + │ Watches //PennQnap1/SHARE1/AutoCora/jobs/ + │ Picks up .json, runs Cora SEO tool + │ Writes .xlsx report to Z:/cora-inbox/ ← auto-deposited + │ Writes //PennQnap1/SHARE1/AutoCora/results/{job-id}.result = "SUCCESS" or "FAILURE: reason" + + +STAGE C: AUTOCORA POLLS FOR RESULTS +════════════════════════════════════ + +AUTOCORA LOOP (every 5 min) + │ + ├─ Scans local DB for autocora:job:* with status = "submitted" + │ For each: checks if results/{job-id}.result exists + │ + ├─ If SUCCESS: + │ Local DB: status = "completed" + │ ClickUp: all task_ids → status = "running cora" + │ ClickUp: comment "Cora report completed for keyword: ..." + │ + ├─ If FAILURE: + │ Local DB: status = "failed" + │ ClickUp: all task_ids → status = "error" + │ ClickUp: comment with failure reason + │ + └─ If no result file yet: skip, check again in 5 min + + +STAGE D: FOLDER WATCHER TRIGGERS LINK BUILDING +═══════════════════════════════════════════════ + +FOLDER WATCHER (every 60 min) + │ + ├─ Scans Z:/cora-inbox/ for .xlsx files + │ Skips: ~$ temp files, already-completed files (via local DB) + │ + ├─ For each new .xlsx: + │ Normalize filename: "anti-vibration-rubber-mounts.xlsx" → "anti vibration rubber mounts" + │ + ▼ +MATCH TO CLICKUP TASK + │ Queries all tasks in space with Work Category = "Link Building" + │ Fuzzy matches Keyword field against normalized filename: + │ - Exact match + │ - Substring match (either direction) + │ - >80% word overlap + │ + ├─ NO MATCH → local DB: status = "unmatched", notification sent, retry next scan + │ + ├─ MATCH FOUND but IMSURL empty → local DB: status = "blocked", ClickUp → "error" + │ + ▼ +SET STATUS → "automation underway" + │ + ▼ +STEP 1: Ingest CORA Report (Big-Link-Man subprocess) + │ Runs: E:/dev/Big-Link-Man/.venv/Scripts/python.exe main.py ingest-cora -f {xlsx} -n {keyword} ... + │ BLM parses xlsx, creates project, writes job file + │ Timeout: 30 minutes + │ ClickUp: comment "CORA report ingested. Project ID: ..." + │ + ▼ +STEP 2: Generate Content Batch (Big-Link-Man subprocess) + │ Runs: python main.py generate-batch -j {job_file} --continue-on-error + │ BLM generates content for each prospect + │ Moves job file to jobs/done/ + │ + ▼ +SET STATUS → "complete" + │ ClickUp: comment with results + │ Move .xlsx to Z:/cora-inbox/processed/ + │ Local DB: linkbuilding:watched:{filename} = {status: "completed"} + │ + ▼ +DONE +``` + +### ClickUp Fields Read +- `Keyword` → matches against .xlsx filename + used as project name +- `IMSURL` → money site URL (required) +- `LB Method` → must be "Cora Backlinks" or empty +- `CustomAnchors`, `BrandedPlusRatio`, `CLIFlags` → passed to BLM + +### What Can Go Wrong +- **BUG: AutoCora only checks today's tasks.** Due date missed = never gets a Cora report. +- **BUG: Crash mid-step → stuck "executing".** Same as PR pipeline. +- No ClickUp task with matching Keyword → file sits unmatched, notification sent +- IMSURL empty → blocked, ClickUp set to "error" +- BLM subprocess timeout (30 min) or crash → task fails +- Network share offline → can't write job file or read results + +### Retry Behavior +- "processing", "blocked", "unmatched" .xlsx files → retried on next scan (KV entry deleted) +- "completed", "failed" → never retried + +--- + +## Pipeline 3: CONTENT CREATION + +**Work Category:** "Content Creation" +**auto_execute:** FALSE — triggered by content folder watcher +**Tool:** `create_content` (two-phase) + +### Flow + +``` +STAGE A: AUTOCORA SUBMITS CORA JOB (same as Link Building Stage A) +══════════════════════════════════════════════════════════════════ + Same AutoCora loop, same BUG with today-only filtering. + Worker generates .xlsx → deposits in Z:/content-cora-inbox/ + + +STAGE B: CONTENT WATCHER TRIGGERS PHASE 1 +══════════════════════════════════════════ + +CONTENT WATCHER (every 60 min) + │ + ├─ Scans Z:/content-cora-inbox/ for .xlsx files + │ Same skip/retry logic as link building watcher + │ + ├─ Normalize filename, fuzzy match to ClickUp task + │ Matches: Work Category in ["Content Creation", "On Page Optimization"] + │ + ├─ NO MATCH → "unmatched", notification + │ + ▼ +PHASE 1: Research + Outline (execution brain - Claude Code CLI) + │ + │ ★ BUG: Does NOT set "automation underway" status (link building watcher does) + │ + │ Build prompt based on content type: + │ - If IMSURL present → "optimize existing page" (scrape it, analyze, outline improvements) + │ - If IMSURL empty → "new content" (competitor research, outline from scratch) + │ - If Cora .xlsx found → "use this Cora report for keyword targets and entities" + │ - If CLIFlags contains "service" → includes service page template + │ + │ Claude Code runs: web searches, scrapes competitors, reads Cora report + │ Generates outline with entity recommendations + │ + ▼ +SAVE OUTLINE + │ Path: Z:/content-outlines/{keyword-slug}/outline.md + │ Local DB: clickup:task:{id}:state = {state: "outline_review", outline_path: "..."} + │ + ▼ +SET STATUS → "outline review" + │ ClickUp: comment "Outline ready for review" + │ + │ ★ BUG: .xlsx NOT moved to processed/ (link building watcher moves files) + │ + ▼ +WAITING FOR HUMAN + │ Human opens outline at Z:/content-outlines/{slug}/outline.md + │ Human edits/approves + │ Human moves ClickUp task to "outline approved" + + +STAGE C: CLICKUP POLL TRIGGERS PHASE 2 +═══════════════════════════════════════ + +CLICKUP POLL (every 20 min) + │ + ├─ Finds task with status = "outline approved" (in poll_statuses list) + │ + ├─ Check local DB: clickup:task:{id}:state + │ Sees state = "outline_review" → this means Phase 2 is ready + │ ★ BUG: If DB was wiped, no entry → runs Phase 1 AGAIN, overwrites outline + │ + ▼ +PHASE 2: Write Full Content (execution brain - Claude Code CLI) + │ + │ Reads outline from path stored in local DB (outline_path) + │ ★ BUG: If outline file was deleted → Phase 2 fails every time, no recovery + │ + │ Claude Code writes full content using the approved outline + │ Includes entity optimization, keyword density targets from Cora + │ + ▼ +SAVE FINAL CONTENT + │ Path: Z:/content-outlines/{keyword-slug}/final-content.md + │ Local DB: state = "completed" + │ + ▼ +SET STATUS → "internal review" + │ ClickUp: comment with content path + │ + ▼ +DONE — Human reviews final content +``` + +### ClickUp Fields Read +- `Keyword` → target keyword, used for Cora matching and content generation +- `IMSURL` → if present = optimization, if empty = new content +- `CLIFlags` → hints like "service" for service page template + +### What Can Go Wrong +- **BUG: AutoCora only checks today → Cora report never generated for overdue tasks** +- **BUG: DB wipe → Phase 2 reruns Phase 1, destroys approved outline** +- **BUG: Outline file deleted → Phase 2 permanently fails** +- **BUG: No "automation underway" set during Phase 1 from watcher** +- **BUG: .xlsx not moved to processed/** +- Network share offline → can't save outline or read it back + +--- + +## Pipeline 4: ON PAGE OPTIMIZATION + +**Work Category:** "On Page Optimization" +**auto_execute:** FALSE +**Tool:** `create_content` (same as Content Creation) + +### Flow + +Identical to Content Creation except: +- Phase 1 prompt says "optimize existing page at {IMSURL}" instead of "create new content" +- Phase 1 scrapes the existing page first, then builds optimization outline +- IMSURL is always present (it's the page being optimized) + +Same bugs apply. + +--- + +## The Local DB (KV Store) — What It Tracks + +| Key Pattern | What It Stores | Read By | Actually Needed? | +|---|---|---|---| +| `clickup:task:{id}:state` | Full task execution state (status, timestamps, outline_path, errors) | ClickUp poll dedup check, Phase 2 detection | **PARTIALLY** — outline_path is needed for Phase 2, but dedup could use ClickUp status instead | +| `autocora:job:{keyword}` | Job submission tracking (job_id, status, task_ids) | AutoCora result poller | **YES** — maps keyword to job_id for result file lookup | +| `linkbuilding:watched:{filename}` | File processing state (processing/completed/failed/unmatched/blocked) | Folder watcher scan | **YES** — prevents re-processing files | +| `content:watched:{filename}` | Same as above for content files | Content watcher scan | **YES** — prevents re-processing | +| `pipeline:status` | Current step text for UI ("Step 2/4: Judging...") | Gradio UI polling | **NO** — just a display string, could be in-memory | +| `linkbuilding:status` | Same for link building UI | Gradio UI polling | **NO** — same | +| `system:loop:*:last_run` (x6) | Timestamp of last loop run | Dashboard API | **NO** — informational only, never used in logic | + +--- + +## Summary of All Bugs + +| # | Bug | Severity | Pipelines Affected | +|---|-----|----------|-------------------| +| 1 | AutoCora only submits for today's due date | HIGH | Link Building, Content, OPT | +| 2 | DB wipe → Phase 2 reruns Phase 1 | HIGH | Content, OPT | +| 3 | Stuck "executing" after crash, no recovery | HIGH | All 4 | +| 4 | Content watcher missing "automation underway" | MEDIUM | Content, OPT | +| 5 | Content watcher doesn't move .xlsx to processed/ | MEDIUM | Content, OPT | +| 6 | KV written before ClickUp API → out of sync | MEDIUM | All 4 | +| 7 | Silent attachment upload failures | MEDIUM | Press Release | +| 8 | Phase 2 fails permanently if outline file gone | LOW | Content, OPT |