"""Two-phase content creation pipeline tool. Phase 1: Research competitors + generate outline → save → stop for human review. Phase 2: Human approves/edits outline → tool picks it up → writes full content. The content-researcher skill in the execution brain is triggered by keywords like "service page", "content optimization", "SEO content", etc. """ from __future__ import annotations import logging import re from pathlib import Path from . import tool log = logging.getLogger(__name__) _ROOT_DIR = Path(__file__).resolve().parent.parent.parent _DATA_DIR = _ROOT_DIR / "data" _LOCAL_CONTENT_DIR = _DATA_DIR / "generated" / "content" _SCRIPTS_DIR = _ROOT_DIR / ".claude" / "skills" / "content-researcher" / "scripts" EXEC_TOOLS = "Bash,Read,Edit,Write,Glob,Grep,WebSearch,WebFetch" # --------------------------------------------------------------------------- # ClickUp helpers # --------------------------------------------------------------------------- def _get_clickup_client(ctx: dict | None): """Create a ClickUpClient from tool context, or None if unavailable.""" if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled: return None try: from ..clickup import ClickUpClient config = ctx["config"] return ClickUpClient( api_token=config.clickup.api_token, workspace_id=config.clickup.workspace_id, task_type_field_name=config.clickup.task_type_field_name, ) except Exception as e: log.warning("Could not create ClickUp client: %s", e) return None def _sync_clickup_start(ctx: dict | None, task_id: str) -> None: """Move ClickUp task to 'automation underway'.""" if not task_id or not ctx: return client = _get_clickup_client(ctx) if not client: return try: config = ctx["config"] client.update_task_status(task_id, config.clickup.automation_status) except Exception as e: log.warning("Failed to set ClickUp start status for %s: %s", task_id, e) finally: client.close() def _sync_clickup_outline_ready(ctx: dict | None, task_id: str, outline_path: str) -> None: """Post outline comment, set OutlinePath field, and move to 'outline review'.""" if not task_id or not ctx: return client = _get_clickup_client(ctx) if not client: return try: # Store OutlinePath in ClickUp custom field for Phase 2 retrieval client.set_custom_field_by_name(task_id, "OutlinePath", outline_path) client.add_comment( task_id, f"[OUTLINE]CheddahBot generated a content outline.\n\n" f"Outline saved to: `{outline_path}`\n\n" f"Please review and edit the outline, then move this task to " f"**outline approved** to trigger the full content write.", ) client.update_task_status(task_id, "outline review") except Exception as e: log.warning("Failed to sync outline-ready for %s: %s", task_id, e) finally: client.close() def _sync_clickup_complete(ctx: dict | None, task_id: str, content_path: str) -> None: """Post completion comment and move ClickUp task to 'internal review'.""" if not task_id or not ctx: return client = _get_clickup_client(ctx) if not client: return try: config = ctx["config"] client.add_comment( task_id, f"[DONE]CheddahBot completed the content.\n\n" f"Final content saved to: `{content_path}`\n\n" f"Ready for internal review.", ) client.update_task_status(task_id, config.clickup.review_status) except Exception as e: log.warning("Failed to sync completion for %s: %s", task_id, e) finally: client.close() def _sync_clickup_fail(ctx: dict | None, task_id: str, error: str) -> None: """Post error comment and move ClickUp task to 'error'.""" if not task_id or not ctx: return client = _get_clickup_client(ctx) if not client: return try: config = ctx["config"] client.add_comment( task_id, f"[FAILED]CheddahBot failed during content creation.\n\nError: {error[:2000]}", ) client.update_task_status(task_id, config.clickup.error_status) except Exception as e: log.warning("Failed to sync failure for %s: %s", task_id, e) finally: client.close() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _slugify(text: str) -> str: """Turn text into a filesystem-safe slug.""" text = text.lower().strip() text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[\s_]+", "-", text) return text[:80].strip("-") def _find_cora_report(keyword: str, cora_inbox: str) -> str: """Fuzzy-match a Cora .xlsx report by keyword. Match priority: exact filename match > substring > word overlap. Skips Office temp files (~$...). Returns the path string, or "" if not found. """ if not cora_inbox or not keyword: return "" inbox = Path(cora_inbox) if not inbox.exists(): return "" xlsx_files = [f for f in inbox.glob("*.xlsx") if not f.name.startswith("~$")] if not xlsx_files: return "" keyword_lower = keyword.lower().strip() keyword_words = set(keyword_lower.split()) # Pass 1: exact stem match for f in xlsx_files: if f.stem.lower().strip() == keyword_lower: return str(f) # Pass 2: keyword is substring of filename (or vice versa) for f in xlsx_files: stem = f.stem.lower().strip() if keyword_lower in stem or stem in keyword_lower: return str(f) # Pass 3: word overlap (at least half the keyword words) best_match = "" best_overlap = 0 for f in xlsx_files: stem_words = set(f.stem.lower().replace("-", " ").replace("_", " ").split()) overlap = len(keyword_words & stem_words) if overlap > best_overlap and overlap >= max(1, len(keyword_words) // 2): best_overlap = overlap best_match = str(f) return best_match def _save_content(content: str, keyword: str, filename: str, config) -> str: """Save content to the outline directory (network path with local fallback). Returns the actual path used. """ slug = _slugify(keyword) if not slug: slug = "unknown" # Try primary (network) path if config.content.outline_dir: primary = Path(config.content.outline_dir) / slug try: primary.mkdir(parents=True, exist_ok=True) out_path = primary / filename out_path.write_text(content, encoding="utf-8") return str(out_path) except OSError as e: log.warning("Network path unavailable (%s), falling back to local: %s", primary, e) # Fallback to local local = _LOCAL_CONTENT_DIR / slug local.mkdir(parents=True, exist_ok=True) out_path = local / filename out_path.write_text(content, encoding="utf-8") return str(out_path) # --------------------------------------------------------------------------- # Prompt builders # --------------------------------------------------------------------------- def _build_phase1_prompt( url: str, keyword: str, content_type: str, cora_path: str, capabilities_default: str, is_service_page: bool = False, outline_save_path: str = "", ) -> str: """Build the Phase 1 prompt that triggers the content-researcher skill. Branches on whether a URL is present: - URL present → optimization path (scrape existing page, match style) - No URL → new content path (research competitors, write net-new) """ if url: # ── Optimization path ── parts = [ f"Optimize the existing page at {url} targeting keyword '{keyword}'. " f"This is an on-page optimization project.", "\n**Step 1 — Scrape the existing page.**\n" "Use the BS4 scraper (scripts/competitor_scraper.py) to fetch the " "current page content — do NOT use web_fetch for this. Analyze its " "style, tone, heading structure, and content organization.", "\n**Step 2 — Build an optimization outline.**\n" "Plan two deliverables:\n" "1. **Optimized page rewrite** — match the original style/tone/structure " "while weaving in entity and keyword targets from the Cora report.\n" "2. **Hidden entity test block** — a `
` block " "containing entity terms that didn't fit naturally into the content.", ] else: # ── New content path ── parts = [ f"Research and outline new {content_type} targeting keyword '{keyword}'. " f"This is a new content creation project.", "\n**Step 1 — Competitor research.**\n" "Scrape the top-ranking pages for this keyword using " "scripts/competitor_scraper.py. Analyze their structure, depth, " "and content coverage.", "\n**Step 2 — Build an outline.**\n" "Plan the content structure with entities woven naturally into " "the headings and body. No hidden entity div needed for new content.", ] if cora_path: parts.append( f"\nA Cora SEO report is available at: {cora_path}\n" f"Read this report to extract keyword targets, entity requirements, " f"and competitive analysis data." ) if is_service_page: cap_note = ( f'\nThis is a **service page**. Use the following as the company ' f'capabilities answer: "{capabilities_default}"\n' f"Do NOT ask the user about capabilities — you are running autonomously. " f"Avoid making specific claims about services, certifications, or " f"licenses not already present on the existing page." ) parts.append(cap_note) elif capabilities_default: parts.append( f'\nWhen asked about company capabilities, respond with: "{capabilities_default}"' ) if outline_save_path: parts.append( f"\nSave the finished outline to `{outline_save_path}`. " "Create any missing directories first. " "The outline must be a complete markdown document with sections, " "headings, entity targets, and keyword placement notes. " "Do NOT save it anywhere else." ) else: parts.append( "\nDeliver the outline as a complete markdown document with sections, " "headings, entity targets, and keyword placement notes." ) return "\n".join(parts) def _build_phase2_prompt( url: str, keyword: str, outline_text: str, cora_path: str, is_service_page: bool = False, capabilities_default: str = "", content_path: str = "", ) -> str: """Build the Phase 2 prompt for writing full content from an approved outline. Branches on whether a URL is present: - URL present → write optimized page rewrite + hidden entity div - No URL → write full new page content """ if url: # ── Optimization path ── parts = [ f"Write the final optimized content for {url} targeting '{keyword}'. " f"This is the writing phase of an on-page optimization project.", f"\n## Approved Outline\n\n{outline_text}", "\n**Deliverables:**\n" "1. **Optimized page rewrite** — match the original page's style, tone, " "and structure. Weave in all entity and keyword targets from the outline.\n" "2. **Hidden entity test block** — generate a " "`
` block containing entity terms that " "didn't fit naturally into the body content. Use the entity test block " "generator (Phase 3 of the content-researcher skill).", ] else: # ── New content path ── parts = [ f"Write full new content targeting '{keyword}'. " f"This is the writing phase of a new content creation project.", f"\n## Approved Outline\n\n{outline_text}", "\nWrite publication-ready content following the outline structure. " "Weave entities naturally into the content — no hidden entity div " "needed for new content.", ] if cora_path: parts.append( f"\nThe Cora SEO report is at: {cora_path}\n" f"Use it for keyword density targets and entity optimization." ) if is_service_page: parts.append( f'\nThis is a **service page**. Company capabilities: "{capabilities_default}"\n' f"Do NOT make specific claims about services, certifications, or " f"licenses not found on the existing page." ) parts.append( "\nWrite publication-ready content following the outline structure. " "Include all entity targets and keyword placements as noted in the outline." ) if content_path: parts.append( f"\n**IMPORTANT — Save the final content as HTML to this exact path:**\n" f"`{content_path}`\n" f"Do NOT save to the local project directory or working/ folder." ) return "\n".join(parts) def _build_optimization_prompt( url: str, keyword: str, cora_path: str, work_dir: str, scripts_dir: str, is_service_page: bool = False, capabilities_default: str = "", ) -> str: """Build the execution brain prompt for the Phase 3 optimization pipeline. Produces 8 sequential steps that scrape the existing page, run deficit analysis, generate a test block, and create an optimization instructions document. All script commands use absolute paths so the CLI can execute them without any skill context. """ parts = [ f"You are running an automated on-page optimization pipeline for " f"'{keyword}' on {url}.\n\n" f"Working directory: {work_dir}\n" f"Cora report: {cora_path}\n" f"Scripts directory: {scripts_dir}\n\n" f"Execute the following steps IN ORDER. Each step depends on the " f"previous step's output files. Do NOT skip steps.\n", # Step 1 — Scrape existing page f"\n## Step 1 — Scrape Existing Page\n\n" f"Run the competitor scraper to fetch the current page content:\n\n" f"```bash\n" f'uv run --with requests,beautifulsoup4 python "{scripts_dir}/competitor_scraper.py" ' f'"{url}" --output-dir "{work_dir}" --format text\n' f"```\n\n" f"This produces `existing_content.md` (or a text file named after the URL) " f"in the working directory. If the output file is not named `existing_content.md`, " f"rename it to `existing_content.md`.", # Step 2 — Deficit analysis f"\n## Step 2 — Test Block Prep (Deficit Analysis)\n\n" f"Run the deficit analysis against the Cora report:\n\n" f"```bash\n" f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_prep.py ' f'"{work_dir}/existing_content.md" "{cora_path}" --format json ' f'> "{work_dir}/prep_data.json"\n' f"```\n\n" f"This produces `prep_data.json` with word count deficits, missing entities, " f"density targets, and template generation instructions.", # Step 3 — Filter entities (LLM step) f"\n## Step 3 — Filter Missing Entities for Topical Relevance\n\n" f'Read `{work_dir}/prep_data.json` and extract the `missing_entities` list. ' f"Filter this list to keep ONLY entities that are topically relevant to " f"'{keyword}' and the page content. Remove generic/off-topic entities.\n\n" f"Write one entity per line to `{work_dir}/filtered_entities.txt`.\n\n" f"Be aggressive about filtering — only keep entities that a subject-matter " f"expert would expect to see on a page about '{keyword}'.", # Step 4 — Write templates (LLM step) f"\n## Step 4 — Write Heading + Body Templates\n\n" f"Using the deficit data from `{work_dir}/prep_data.json` and the filtered " f"entities from `{work_dir}/filtered_entities.txt`, write:\n\n" f"1. H2 and H3 headings that incorporate target entities\n" f"2. Body sentence templates with `{{N}}` placeholder slots where entity " f"terms will be inserted programmatically\n\n" f"Format: Each template is a heading line followed by body sentences. " f"Each body sentence should have 1-3 `{{N}}` slots (numbered sequentially " f"starting from 1 within each sentence).\n\n" f"Write the output to `{work_dir}/templates.txt`.\n\n" f"Example format:\n" f"```\n" f"## Heading About {{1}} and {{2}}\n" f"Sentence with {{1}} integrated naturally. Another point about {{2}} " f"that provides value.\n" f"```", # Step 5 — Generate test block (script) f"\n## Step 5 — Generate Test Block\n\n" f"Run the test block generator to fill template slots and produce the " f"HTML test block:\n\n" f"```bash\n" f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_generator.py ' f'"{work_dir}/templates.txt" "{work_dir}/prep_data.json" "{cora_path}" ' f'--entities-file "{work_dir}/filtered_entities.txt" ' f'--output-dir "{work_dir}"\n' f"```\n\n" f"This produces `test_block.md`, `test_block.html`, and `test_block_stats.json` " f"in the working directory.", # Step 6 — Rewrite for readability (LLM step) f"\n## Step 6 — Rewrite Body Sentences for Readability\n\n" f"Read `{work_dir}/test_block.md`. Rewrite each body sentence to improve " f"readability and natural flow while preserving:\n" f"- ALL entity strings exactly as they appear (do not paraphrase entity terms)\n" f"- The overall heading structure\n" f"- The `` markers\n\n" f"Write the improved version back to `{work_dir}/test_block.md`.\n" f"Then regenerate the HTML version at `{work_dir}/test_block.html` with the " f"content wrapped in `
` tags.", # Step 7 — Validate (script) f"\n## Step 7 — Validate Test Block\n\n" f"Run the before/after validation:\n\n" f"```bash\n" f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_validate.py ' f'"{work_dir}/existing_content.md" "{work_dir}/test_block.md" "{cora_path}" ' f'--format json --output "{work_dir}/validation_report.json"\n' f"```\n\n" f"This produces `validation_report.json` with before/after metrics comparison.", # Step 8 — Generate optimization instructions (LLM step) f"\n## Step 8 — Generate Optimization Instructions\n\n" f"Read the following files:\n" f"- `{work_dir}/existing_content.md` (current page)\n" f"- `{work_dir}/prep_data.json` (deficit analysis)\n" f"- `{work_dir}/validation_report.json` (before/after metrics)\n" f"- `{work_dir}/test_block.md` (generated test block)\n\n" f"Generate `{work_dir}/optimization_instructions.md` — a surgical playbook " f"for the human editor with these sections:\n\n" f"1. **Executive Summary** — one-paragraph overview of optimization opportunity\n" f"2. **Heading Changes** — specific H1/H2/H3 modifications with before/after\n" f"3. **Sections to Expand** — which sections need more content and what to add\n" f"4. **Entity Integration Points** — exact locations to weave in missing entities\n" f"5. **Meta Tag Updates** — title tag and meta description recommendations\n" f"6. **Content Gaps** — topics covered by competitors but missing from this page\n" f"7. **Priority Ranking** — rank all changes by expected SEO impact (high/medium/low)\n\n" f"Be specific and actionable. Reference exact headings and paragraphs from " f"the existing content. Do NOT rewrite the full page — this is a surgical guide.", ] if is_service_page: parts.append( f'\nNOTE: This is a **service page**. Company capabilities: ' f'"{capabilities_default}"\n' f"Do NOT make specific claims about services, certifications, or " f"licenses not found on the existing page." ) return "\n".join(parts) # --------------------------------------------------------------------------- # Optimization pipeline (Phase 3 — test block + surgical instructions) # --------------------------------------------------------------------------- def _run_optimization( *, agent, config, ctx: dict | None, task_id: str, url: str, keyword: str, cora_path: str, is_service_page: bool = False, capabilities_default: str = "", ) -> str: """Run the Phase 3 optimization pipeline. Requires a Cora report. Creates an isolated working directory, calls the execution brain with the 8-step optimization prompt, then finalizes by collecting deliverables and syncing ClickUp. """ if not cora_path: msg = ( f"Error: No Cora report found for keyword '{keyword}'. " f"A Cora report is required for the optimization pipeline. " f"Please upload a Cora .xlsx report to the content Cora inbox." ) log.error(msg) if task_id: _sync_clickup_fail(ctx, task_id, msg) return msg slug = _slugify(keyword) or "unknown" work_dir = _LOCAL_CONTENT_DIR / slug / f"optimization-{task_id or 'manual'}" work_dir.mkdir(parents=True, exist_ok=True) scripts_dir = str(_SCRIPTS_DIR) # ClickUp: move to automation underway if task_id: _sync_clickup_start(ctx, task_id) prompt = _build_optimization_prompt( url=url, keyword=keyword, cora_path=cora_path, work_dir=str(work_dir), scripts_dir=scripts_dir, is_service_page=is_service_page, capabilities_default=capabilities_default, ) log.info( "Optimization pipeline — running for '%s' (%s), work_dir=%s", keyword, url, work_dir, ) try: exec_result = agent.execute_task( prompt, tools=EXEC_TOOLS, skip_permissions=True, ) except Exception as e: error_msg = f"Optimization pipeline execution failed: {e}" log.error(error_msg) if task_id: _sync_clickup_fail(ctx, task_id, str(e)) return f"Error: {error_msg}" if exec_result.startswith("Error:"): if task_id: _sync_clickup_fail(ctx, task_id, exec_result) return exec_result return _finalize_optimization( ctx=ctx, config=config, task_id=task_id, keyword=keyword, url=url, work_dir=work_dir, exec_result=exec_result, ) def _finalize_optimization( *, ctx: dict | None, config, task_id: str, keyword: str, url: str, work_dir: Path, exec_result: str, ) -> str: """Collect deliverables from the working directory and sync ClickUp. Required files: test_block.html, optimization_instructions.md. Optional: validation_report.json. """ required = ["test_block.html", "optimization_instructions.md"] missing = [f for f in required if not (work_dir / f).exists()] if missing: error_msg = ( f"Optimization pipeline finished but required deliverables are " f"missing: {', '.join(missing)}. Working directory: {work_dir}" ) log.error(error_msg) if task_id: _sync_clickup_fail(ctx, task_id, error_msg) return f"Error: {error_msg}" # Collect all deliverable paths deliverable_names = [ "test_block.html", "optimization_instructions.md", "validation_report.json", ] found_files: dict[str, Path] = {} for name in deliverable_names: fpath = work_dir / name if fpath.exists(): found_files[name] = fpath # Copy deliverables to network path (if configured) slug = _slugify(keyword) or "unknown" if config and config.content.outline_dir: net_dir = Path(config.content.outline_dir) / slug try: net_dir.mkdir(parents=True, exist_ok=True) for name, fpath in found_files.items(): dest = net_dir / name dest.write_bytes(fpath.read_bytes()) log.info("Copied %s -> %s", fpath, dest) except OSError as e: log.warning("Could not copy deliverables to network path %s: %s", net_dir, e) # Sync ClickUp if task_id: _sync_clickup_optimization_complete( ctx=ctx, config=config, task_id=task_id, keyword=keyword, url=url, found_files=found_files, work_dir=work_dir, ) file_list = "\n".join(f"- `{p}`" for p in found_files.values()) return ( f"## Optimization Complete\n\n" f"**Keyword:** {keyword}\n" f"**URL:** {url}\n" f"**Deliverables:**\n{file_list}\n\n" f"---\n\n{exec_result}\n\n" f"## ClickUp Sync\nOptimization complete. Status: internal review." ) def _sync_clickup_optimization_complete( *, ctx: dict | None, config, task_id: str, keyword: str, url: str, found_files: dict[str, Path], work_dir: Path, ) -> None: """Upload optimization deliverables to ClickUp and set status.""" if not task_id or not ctx: return client = _get_clickup_client(ctx) if not client: return try: # Upload attachments for name, fpath in found_files.items(): try: client.upload_attachment(task_id, fpath) log.info("Uploaded %s to ClickUp task %s", name, task_id) except Exception as e: log.warning("Failed to upload %s: %s", name, e) # Build comment with validation summary comment_parts = [ f"[DONE]Optimization pipeline complete for '{keyword}'.\n", f"**URL:** {url}\n", "**Deliverables attached:**", ] for name in found_files: comment_parts.append(f"- {name}") # Include validation summary if available val_path = work_dir / "validation_report.json" if val_path.exists(): try: import json val_data = json.loads(val_path.read_text(encoding="utf-8")) summary = val_data.get("summary", "") if summary: comment_parts.append(f"\n**Validation Summary:**\n{summary}") except Exception: pass comment_parts.append( "\n**Next Steps:**\n" "1. Review `optimization_instructions.md` for surgical changes\n" "2. Deploy `test_block.html` hidden div to the page\n" "3. Monitor rankings for 2-4 weeks\n" "4. Apply surgical changes from the instructions doc" ) client.add_comment(task_id, "\n".join(comment_parts)) client.update_task_status(task_id, config.clickup.review_status) except Exception as e: log.warning("Failed to sync optimization complete for %s: %s", task_id, e) finally: client.close() # --------------------------------------------------------------------------- # Main tool # --------------------------------------------------------------------------- @tool( "create_content", "SEO content creation. Set content_type='new content' for new pages " "(Phase 1 outline → Phase 2 full write), or content_type='on page optimization' " "to optimize an existing page with Cora data. Auto-detects phase from ClickUp " "task status ('outline approved' → Phase 2). Ask the user which type if unclear.", category="content", ) def create_content( keyword: str, url: str = "", content_type: str = "", cli_flags: str = "", ctx: dict | None = None, ) -> str: """Create SEO content in two phases with human review between them. Args: keyword: Primary target keyword (e.g. "plumbing services"). url: Target page URL (optional for new content, required for optimization). content_type: 'new content' or 'on page optimization'. Controls routing. If empty, inferred from URL presence as fallback. cli_flags: Optional flags (e.g. "service" for service page hint). """ if not keyword: return "Error: 'keyword' is required." if not ctx or "agent" not in ctx: return "Error: Tool context with agent is required." # Auto-detect content_type from URL presence when not explicitly set if not content_type: content_type = "on page optimization" if url else "new content" # Service page hint from cli_flags is_service_page = bool(cli_flags and "service" in cli_flags.lower()) agent = ctx["agent"] config = ctx.get("config") db = ctx.get("db") task_id = ctx.get("clickup_task_id", "") # Determine phase from ClickUp task status # Prefer status passed by scheduler (pre-status-change) over re-fetching phase = 1 original_status = ctx.get("clickup_task_status", "") if original_status and original_status.lower() == "outline approved": phase = 2 elif task_id and ctx: client = _get_clickup_client(ctx) if client: try: task = client.get_task(task_id) if task.status.lower() == "outline approved": phase = 2 except Exception as e: log.warning("Could not check ClickUp status for phase detection: %s", e) finally: client.close() # Find Cora report cora_inbox = config.content.cora_inbox if config else "" cora_path = _find_cora_report(keyword, cora_inbox) if cora_path: log.info("Found Cora report for '%s': %s", keyword, cora_path) capabilities_default = config.content.company_capabilities_default if config else "" # Optimization path: content_type determines route (URL fallback for chat callers) if content_type.lower() == "on page optimization": if not url: return "Error: On Page Optimization requires a URL (IMSURL field)." return _run_optimization( agent=agent, config=config, ctx=ctx, task_id=task_id, url=url, keyword=keyword, cora_path=cora_path, is_service_page=is_service_page, capabilities_default=capabilities_default, ) # New content path: Phase 1 (outline) → human review → Phase 2 (write) if phase == 1: return _run_phase1( agent=agent, config=config, db=db, ctx=ctx, task_id=task_id, url=url, keyword=keyword, content_type=content_type, cora_path=cora_path, capabilities_default=capabilities_default, is_service_page=is_service_page, ) else: return _run_phase2( agent=agent, config=config, db=db, ctx=ctx, task_id=task_id, url=url, keyword=keyword, cora_path=cora_path, existing_state={}, is_service_page=is_service_page, capabilities_default=capabilities_default, ) # --------------------------------------------------------------------------- # Phase 1: Research + Outline # --------------------------------------------------------------------------- def _run_phase1( *, agent, config, db, ctx, task_id: str, url: str, keyword: str, content_type: str, cora_path: str, capabilities_default: str, is_service_page: bool = False, ) -> str: # Compute the outline save path upfront so the execution brain writes # directly to the network share (or local fallback). slug = _slugify(keyword) or "unknown" outline_path = "" if config.content.outline_dir: primary = Path(config.content.outline_dir) / slug try: primary.mkdir(parents=True, exist_ok=True) outline_path = str(primary / "outline.md") except OSError as e: log.warning("Network path unavailable (%s), falling back to local: %s", primary, e) if not outline_path: local = _LOCAL_CONTENT_DIR / slug local.mkdir(parents=True, exist_ok=True) outline_path = str(local / "outline.md") # ClickUp: move to automation underway if task_id: _sync_clickup_start(ctx, task_id) prompt = _build_phase1_prompt( url, keyword, content_type, cora_path, capabilities_default, is_service_page, outline_save_path=outline_path, ) log.info("Phase 1 — researching + outlining for '%s' (%s)", keyword, url or "new content") try: result = agent.execute_task( prompt, tools=EXEC_TOOLS, skip_permissions=True, ) except Exception as e: error_msg = f"Phase 1 execution failed: {e}" log.error(error_msg) if task_id: _sync_clickup_fail(ctx, task_id, str(e)) return f"Error: {error_msg}" if result.startswith("Error:"): if task_id: _sync_clickup_fail(ctx, task_id, result) return result # Verify the outline was saved by the execution brain if not Path(outline_path).is_file(): log.warning( "Execution brain did not save outline to %s; saving result text as fallback.", outline_path, ) Path(outline_path).parent.mkdir(parents=True, exist_ok=True) Path(outline_path).write_text(result, encoding="utf-8") log.info("Outline saved to: %s", outline_path) # ClickUp: move to outline review + store OutlinePath if task_id: _sync_clickup_outline_ready(ctx, task_id, outline_path) url_line = f"**URL:** {url}\n" if url else "**Type:** New content\n" return ( f"## Phase 1 Complete — Outline Ready for Review\n\n" f"**Keyword:** {keyword}\n" f"{url_line}" f"**Outline saved to:** `{outline_path}`\n\n" f"Please review and edit the outline. When ready, move the ClickUp task " f"to **outline approved** to trigger Phase 2 (full content writing).\n\n" f"---\n\n{result}\n\n" f"## ClickUp Sync\nPhase 1 complete. Status: outline review." ) # --------------------------------------------------------------------------- # Phase 2: Write Full Content # --------------------------------------------------------------------------- def _resolve_outline_path(ctx: dict | None, task_id: str, keyword: str, config) -> str: """Resolve the outline path from ClickUp field or convention. Priority: ClickUp OutlinePath field → convention path → empty string. """ # Try ClickUp custom field first if task_id and ctx: client = _get_clickup_client(ctx) if client: try: outline_path = client.get_custom_field_by_name(task_id, "OutlinePath") if outline_path and str(outline_path).strip(): return str(outline_path).strip() except Exception as e: log.warning("Failed to read OutlinePath from ClickUp for %s: %s", task_id, e) finally: client.close() # Fallback to convention path slug = _slugify(keyword) if slug and config and config.content.outline_dir: convention_path = Path(config.content.outline_dir) / slug / "outline.md" if convention_path.exists(): return str(convention_path) # Try local fallback too if slug: local_path = _LOCAL_CONTENT_DIR / slug / "outline.md" if local_path.exists(): return str(local_path) return "" def _run_phase2( *, agent, config, db, ctx, task_id: str, url: str, keyword: str, cora_path: str, existing_state: dict, is_service_page: bool = False, capabilities_default: str = "", ) -> str: # Resolve outline path: ClickUp field → convention outline_path = _resolve_outline_path(ctx, task_id, keyword, config) outline_text = "" if outline_path: try: outline_text = Path(outline_path).read_text(encoding="utf-8") except OSError as e: log.warning("Could not read outline at %s: %s", outline_path, e) if not outline_text: # Reset task to "to do" so it can be retried after re-running Phase 1 if task_id and ctx: client = _get_clickup_client(ctx) if client: try: poll_sts = config.clickup.poll_statuses reset_status = poll_sts[0] if poll_sts else "to do" client.update_task_status(task_id, reset_status) client.add_comment( task_id, f"[WARNING]Outline file not found for keyword '{keyword}'. " f"Searched: {outline_path or '(no path saved)'}. " f"Please re-run Phase 1 (create_content) to generate a new outline.", ) except Exception as e: log.warning("Failed to reset task %s after missing outline: %s", task_id, e) finally: client.close() return ( "Error: Outline file not found. " f"Searched at: {outline_path or '(no path saved)'}. " f"Please re-run Phase 1 (create_content) to generate a new outline." ) # Use saved cora_path from state if we don't have one now if not cora_path: cora_path = existing_state.get("cora_path", "") # Compute save path for the final content HTML (network share with local fallback) slug = _slugify(keyword) or "unknown" content_path = "" if config.content.outline_dir: primary = Path(config.content.outline_dir) / slug try: primary.mkdir(parents=True, exist_ok=True) content_path = str(primary / "final-content.html") except OSError as e: log.warning("Network path unavailable (%s), falling back to local: %s", primary, e) if not content_path: local = _LOCAL_CONTENT_DIR / slug local.mkdir(parents=True, exist_ok=True) content_path = str(local / "final-content.html") # ClickUp: move to automation underway if task_id: _sync_clickup_start(ctx, task_id) prompt = _build_phase2_prompt( url, keyword, outline_text, cora_path, is_service_page, capabilities_default, content_path=content_path, ) log.info("Phase 2 — writing full content for '%s' (%s)", keyword, url) try: result = agent.execute_task( prompt, tools=EXEC_TOOLS, skip_permissions=True, ) except Exception as e: error_msg = f"Phase 2 execution failed: {e}" log.error(error_msg) if task_id: _sync_clickup_fail(ctx, task_id, str(e)) return f"Error: {error_msg}" if result.startswith("Error:"): if task_id: _sync_clickup_fail(ctx, task_id, result) return result # Save final content content_path = _save_content(result, keyword, "final-content.md", config) log.info("Final content saved to: %s", content_path) # ClickUp: move to internal review if task_id: _sync_clickup_complete(ctx, task_id, content_path) url_line = f"**URL:** {url}\n" if url else "**Type:** New content\n" return ( f"## Phase 2 Complete — Content Written\n\n" f"**Keyword:** {keyword}\n" f"{url_line}" f"**Content saved to:** `{content_path}`\n\n" f"---\n\n{result}\n\n" f"## ClickUp Sync\nPhase 2 complete. Status: internal review." ) # --------------------------------------------------------------------------- # Continue content (chat-initiated Phase 2) # --------------------------------------------------------------------------- @tool( "continue_content", "Resume content creation for a keyword that has an approved outline. " "Runs Phase 2 (full content writing) for a previously outlined keyword.", category="content", ) def continue_content( keyword: str, ctx: dict | None = None, ) -> str: """Resume content writing for a keyword with an approved outline. Args: keyword: The keyword to continue writing content for. """ if not keyword: return "Error: 'keyword' is required." if not ctx or "agent" not in ctx: return "Error: Tool context with agent is required." config = ctx.get("config") db = ctx.get("db") # Query ClickUp for tasks in "outline approved" or "outline review" status # matching the keyword client = _get_clickup_client(ctx) if client: try: space_id = config.clickup.space_id if config else "" if space_id: tasks = client.get_tasks_from_space( space_id, statuses=["outline approved", "outline review"], ) keyword_lower = keyword.lower().strip() for task in tasks: task_keyword = task.custom_fields.get("Keyword", "") if str(task_keyword).lower().strip() == keyword_lower: task_id = task.id url = task.custom_fields.get("IMSURL", "") or "" cora_inbox = config.content.cora_inbox if config else "" cora_path = _find_cora_report(keyword, cora_inbox) return _run_phase2( agent=ctx["agent"], config=config, db=db, ctx=ctx, task_id=task_id, url=str(url), keyword=keyword, cora_path=cora_path or "", existing_state={}, ) except Exception as e: log.warning("ClickUp query failed in continue_content: %s", e) finally: client.close() # Fallback: try to run Phase 2 without a ClickUp task (outline must exist locally) outline_path = _resolve_outline_path(ctx, "", keyword, config) if outline_path: return _run_phase2( agent=ctx["agent"], config=config, db=db, ctx=ctx, task_id="", url="", keyword=keyword, cora_path="", existing_state={}, ) return ( f"No outline awaiting review found for keyword '{keyword}'. " f"Use create_content to start Phase 1 first." )