1184 lines
43 KiB
Python
1184 lines
43 KiB
Python
"""Two-phase content creation pipeline tool.
|
|
|
|
Phase 1: Research competitors + generate outline → save → stop for human review.
|
|
Phase 2: Human approves/edits outline → tool picks it up → writes full content.
|
|
|
|
The content-researcher skill in the execution brain is triggered by keywords like
|
|
"service page", "content optimization", "SEO content", etc.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_ROOT_DIR = Path(__file__).resolve().parent.parent.parent
|
|
_DATA_DIR = _ROOT_DIR / "data"
|
|
_LOCAL_CONTENT_DIR = _DATA_DIR / "generated" / "content"
|
|
_SCRIPTS_DIR = _ROOT_DIR / ".claude" / "skills" / "content-researcher" / "scripts"
|
|
|
|
EXEC_TOOLS = "Bash,Read,Edit,Write,Glob,Grep,WebSearch,WebFetch"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ClickUp helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _get_clickup_client(ctx: dict | None):
|
|
"""Create a ClickUpClient from tool context, or None if unavailable."""
|
|
if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
|
|
return None
|
|
try:
|
|
from ..clickup import ClickUpClient
|
|
|
|
config = ctx["config"]
|
|
return ClickUpClient(
|
|
api_token=config.clickup.api_token,
|
|
workspace_id=config.clickup.workspace_id,
|
|
task_type_field_name=config.clickup.task_type_field_name,
|
|
)
|
|
except Exception as e:
|
|
log.warning("Could not create ClickUp client: %s", e)
|
|
return None
|
|
|
|
|
|
def _sync_clickup_start(ctx: dict | None, task_id: str) -> None:
|
|
"""Move ClickUp task to 'automation underway'."""
|
|
if not task_id or not ctx:
|
|
return
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return
|
|
try:
|
|
config = ctx["config"]
|
|
client.update_task_status(task_id, config.clickup.automation_status)
|
|
except Exception as e:
|
|
log.warning("Failed to set ClickUp start status for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
|
|
def _sync_clickup_outline_ready(ctx: dict | None, task_id: str, outline_path: str) -> None:
|
|
"""Post outline comment, set OutlinePath field, and move to 'outline review'."""
|
|
if not task_id or not ctx:
|
|
return
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return
|
|
try:
|
|
# Store OutlinePath in ClickUp custom field for Phase 2 retrieval
|
|
client.set_custom_field_by_name(task_id, "OutlinePath", outline_path)
|
|
|
|
client.add_comment(
|
|
task_id,
|
|
f"[OUTLINE]CheddahBot generated a content outline.\n\n"
|
|
f"Outline saved to: `{outline_path}`\n\n"
|
|
f"Please review and edit the outline, then move this task to "
|
|
f"**outline approved** to trigger the full content write.",
|
|
)
|
|
client.update_task_status(task_id, "outline review")
|
|
except Exception as e:
|
|
log.warning("Failed to sync outline-ready for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
|
|
def _sync_clickup_complete(ctx: dict | None, task_id: str, content_path: str) -> None:
|
|
"""Post completion comment and move ClickUp task to 'internal review'."""
|
|
if not task_id or not ctx:
|
|
return
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return
|
|
try:
|
|
config = ctx["config"]
|
|
client.add_comment(
|
|
task_id,
|
|
f"[DONE]CheddahBot completed the content.\n\n"
|
|
f"Final content saved to: `{content_path}`\n\n"
|
|
f"Ready for internal review.",
|
|
)
|
|
client.update_task_status(task_id, config.clickup.review_status)
|
|
except Exception as e:
|
|
log.warning("Failed to sync completion for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
|
|
def _sync_clickup_fail(ctx: dict | None, task_id: str, error: str) -> None:
|
|
"""Post error comment and move ClickUp task to 'error'."""
|
|
if not task_id or not ctx:
|
|
return
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return
|
|
try:
|
|
config = ctx["config"]
|
|
client.add_comment(
|
|
task_id,
|
|
f"[FAILED]CheddahBot failed during content creation.\n\nError: {error[:2000]}",
|
|
)
|
|
client.update_task_status(task_id, config.clickup.error_status)
|
|
except Exception as e:
|
|
log.warning("Failed to sync failure for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _slugify(text: str) -> str:
|
|
"""Turn text into a filesystem-safe slug."""
|
|
text = text.lower().strip()
|
|
text = re.sub(r"[^\w\s-]", "", text)
|
|
text = re.sub(r"[\s_]+", "-", text)
|
|
return text[:80].strip("-")
|
|
|
|
|
|
def _find_cora_report(keyword: str, cora_inbox: str) -> str:
|
|
"""Fuzzy-match a Cora .xlsx report by keyword.
|
|
|
|
Match priority: exact filename match > substring > word overlap.
|
|
Skips Office temp files (~$...).
|
|
Returns the path string, or "" if not found.
|
|
"""
|
|
if not cora_inbox or not keyword:
|
|
return ""
|
|
inbox = Path(cora_inbox)
|
|
if not inbox.exists():
|
|
return ""
|
|
|
|
xlsx_files = [f for f in inbox.glob("*.xlsx") if not f.name.startswith("~$")]
|
|
if not xlsx_files:
|
|
return ""
|
|
|
|
keyword_lower = keyword.lower().strip()
|
|
keyword_words = set(keyword_lower.split())
|
|
|
|
# Pass 1: exact stem match
|
|
for f in xlsx_files:
|
|
if f.stem.lower().strip() == keyword_lower:
|
|
return str(f)
|
|
|
|
# Pass 2: keyword is substring of filename (or vice versa)
|
|
for f in xlsx_files:
|
|
stem = f.stem.lower().strip()
|
|
if keyword_lower in stem or stem in keyword_lower:
|
|
return str(f)
|
|
|
|
# Pass 3: word overlap (at least half the keyword words)
|
|
best_match = ""
|
|
best_overlap = 0
|
|
for f in xlsx_files:
|
|
stem_words = set(f.stem.lower().replace("-", " ").replace("_", " ").split())
|
|
overlap = len(keyword_words & stem_words)
|
|
if overlap > best_overlap and overlap >= max(1, len(keyword_words) // 2):
|
|
best_overlap = overlap
|
|
best_match = str(f)
|
|
|
|
return best_match
|
|
|
|
|
|
def _save_content(content: str, keyword: str, filename: str, config) -> str:
|
|
"""Save content to the outline directory (network path with local fallback).
|
|
|
|
Returns the actual path used.
|
|
"""
|
|
slug = _slugify(keyword)
|
|
if not slug:
|
|
slug = "unknown"
|
|
|
|
# Try primary (network) path
|
|
if config.content.outline_dir:
|
|
primary = Path(config.content.outline_dir) / slug
|
|
try:
|
|
primary.mkdir(parents=True, exist_ok=True)
|
|
out_path = primary / filename
|
|
out_path.write_text(content, encoding="utf-8")
|
|
return str(out_path)
|
|
except OSError as e:
|
|
log.warning("Network path unavailable (%s), falling back to local: %s", primary, e)
|
|
|
|
# Fallback to local
|
|
local = _LOCAL_CONTENT_DIR / slug
|
|
local.mkdir(parents=True, exist_ok=True)
|
|
out_path = local / filename
|
|
out_path.write_text(content, encoding="utf-8")
|
|
return str(out_path)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prompt builders
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _build_phase1_prompt(
|
|
url: str,
|
|
keyword: str,
|
|
content_type: str,
|
|
cora_path: str,
|
|
capabilities_default: str,
|
|
is_service_page: bool = False,
|
|
outline_save_path: str = "",
|
|
) -> str:
|
|
"""Build the Phase 1 prompt that triggers the content-researcher skill.
|
|
|
|
Branches on whether a URL is present:
|
|
- URL present → optimization path (scrape existing page, match style)
|
|
- No URL → new content path (research competitors, write net-new)
|
|
"""
|
|
if url:
|
|
# ── Optimization path ──
|
|
parts = [
|
|
f"Optimize the existing page at {url} targeting keyword '{keyword}'. "
|
|
f"This is an on-page optimization project.",
|
|
"\n**Step 1 — Scrape the existing page.**\n"
|
|
"Use the BS4 scraper (scripts/competitor_scraper.py) to fetch the "
|
|
"current page content — do NOT use web_fetch for this. Analyze its "
|
|
"style, tone, heading structure, and content organization.",
|
|
"\n**Step 2 — Build an optimization outline.**\n"
|
|
"Plan two deliverables:\n"
|
|
"1. **Optimized page rewrite** — match the original style/tone/structure "
|
|
"while weaving in entity and keyword targets from the Cora report.\n"
|
|
"2. **Hidden entity test block** — a `<div style=\"display:none\">` block "
|
|
"containing entity terms that didn't fit naturally into the content.",
|
|
]
|
|
else:
|
|
# ── New content path ──
|
|
parts = [
|
|
f"Research and outline new {content_type} targeting keyword '{keyword}'. "
|
|
f"This is a new content creation project.",
|
|
"\n**Step 1 — Competitor research.**\n"
|
|
"Scrape the top-ranking pages for this keyword using "
|
|
"scripts/competitor_scraper.py. Analyze their structure, depth, "
|
|
"and content coverage.",
|
|
"\n**Step 2 — Build an outline.**\n"
|
|
"Plan the content structure with entities woven naturally into "
|
|
"the headings and body. No hidden entity div needed for new content.",
|
|
]
|
|
|
|
if cora_path:
|
|
parts.append(
|
|
f"\nA Cora SEO report is available at: {cora_path}\n"
|
|
f"Read this report to extract keyword targets, entity requirements, "
|
|
f"and competitive analysis data."
|
|
)
|
|
|
|
if is_service_page:
|
|
cap_note = (
|
|
f'\nThis is a **service page**. Use the following as the company '
|
|
f'capabilities answer: "{capabilities_default}"\n'
|
|
f"Do NOT ask the user about capabilities — you are running autonomously. "
|
|
f"Avoid making specific claims about services, certifications, or "
|
|
f"licenses not already present on the existing page."
|
|
)
|
|
parts.append(cap_note)
|
|
elif capabilities_default:
|
|
parts.append(
|
|
f'\nWhen asked about company capabilities, respond with: "{capabilities_default}"'
|
|
)
|
|
|
|
if outline_save_path:
|
|
parts.append(
|
|
f"\nSave the finished outline to `{outline_save_path}`. "
|
|
"Create any missing directories first. "
|
|
"The outline must be a complete markdown document with sections, "
|
|
"headings, entity targets, and keyword placement notes. "
|
|
"Do NOT save it anywhere else."
|
|
)
|
|
else:
|
|
parts.append(
|
|
"\nDeliver the outline as a complete markdown document with sections, "
|
|
"headings, entity targets, and keyword placement notes."
|
|
)
|
|
return "\n".join(parts)
|
|
|
|
|
|
def _build_phase2_prompt(
|
|
url: str,
|
|
keyword: str,
|
|
outline_text: str,
|
|
cora_path: str,
|
|
is_service_page: bool = False,
|
|
capabilities_default: str = "",
|
|
content_path: str = "",
|
|
) -> str:
|
|
"""Build the Phase 2 prompt for writing full content from an approved outline.
|
|
|
|
Branches on whether a URL is present:
|
|
- URL present → write optimized page rewrite + hidden entity div
|
|
- No URL → write full new page content
|
|
"""
|
|
if url:
|
|
# ── Optimization path ──
|
|
parts = [
|
|
f"Write the final optimized content for {url} targeting '{keyword}'. "
|
|
f"This is the writing phase of an on-page optimization project.",
|
|
f"\n## Approved Outline\n\n{outline_text}",
|
|
"\n**Deliverables:**\n"
|
|
"1. **Optimized page rewrite** — match the original page's style, tone, "
|
|
"and structure. Weave in all entity and keyword targets from the outline.\n"
|
|
"2. **Hidden entity test block** — generate a "
|
|
"`<div style=\"display:none\">` block containing entity terms that "
|
|
"didn't fit naturally into the body content. Use the entity test block "
|
|
"generator (Phase 3 of the content-researcher skill).",
|
|
]
|
|
else:
|
|
# ── New content path ──
|
|
parts = [
|
|
f"Write full new content targeting '{keyword}'. "
|
|
f"This is the writing phase of a new content creation project.",
|
|
f"\n## Approved Outline\n\n{outline_text}",
|
|
"\nWrite publication-ready content following the outline structure. "
|
|
"Weave entities naturally into the content — no hidden entity div "
|
|
"needed for new content.",
|
|
]
|
|
|
|
if cora_path:
|
|
parts.append(
|
|
f"\nThe Cora SEO report is at: {cora_path}\n"
|
|
f"Use it for keyword density targets and entity optimization."
|
|
)
|
|
|
|
if is_service_page:
|
|
parts.append(
|
|
f'\nThis is a **service page**. Company capabilities: "{capabilities_default}"\n'
|
|
f"Do NOT make specific claims about services, certifications, or "
|
|
f"licenses not found on the existing page."
|
|
)
|
|
|
|
parts.append(
|
|
"\nWrite publication-ready content following the outline structure. "
|
|
"Include all entity targets and keyword placements as noted in the outline."
|
|
)
|
|
|
|
if content_path:
|
|
parts.append(
|
|
f"\n**IMPORTANT — Save the final content as HTML to this exact path:**\n"
|
|
f"`{content_path}`\n"
|
|
f"Do NOT save to the local project directory or working/ folder."
|
|
)
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def _build_optimization_prompt(
|
|
url: str,
|
|
keyword: str,
|
|
cora_path: str,
|
|
work_dir: str,
|
|
scripts_dir: str,
|
|
is_service_page: bool = False,
|
|
capabilities_default: str = "",
|
|
) -> str:
|
|
"""Build the execution brain prompt for the Phase 3 optimization pipeline.
|
|
|
|
Produces 8 sequential steps that scrape the existing page, run deficit
|
|
analysis, generate a test block, and create an optimization instructions
|
|
document. All script commands use absolute paths so the CLI can execute
|
|
them without any skill context.
|
|
"""
|
|
parts = [
|
|
f"You are running an automated on-page optimization pipeline for "
|
|
f"'{keyword}' on {url}.\n\n"
|
|
f"Working directory: {work_dir}\n"
|
|
f"Cora report: {cora_path}\n"
|
|
f"Scripts directory: {scripts_dir}\n\n"
|
|
f"Execute the following steps IN ORDER. Each step depends on the "
|
|
f"previous step's output files. Do NOT skip steps.\n",
|
|
|
|
# Step 1 — Scrape existing page
|
|
f"\n## Step 1 — Scrape Existing Page\n\n"
|
|
f"Run the competitor scraper to fetch the current page content:\n\n"
|
|
f"```bash\n"
|
|
f'uv run --with requests,beautifulsoup4 python "{scripts_dir}/competitor_scraper.py" '
|
|
f'"{url}" --output-dir "{work_dir}" --format text\n'
|
|
f"```\n\n"
|
|
f"This produces `existing_content.md` (or a text file named after the URL) "
|
|
f"in the working directory. If the output file is not named `existing_content.md`, "
|
|
f"rename it to `existing_content.md`.",
|
|
|
|
# Step 2 — Deficit analysis
|
|
f"\n## Step 2 — Test Block Prep (Deficit Analysis)\n\n"
|
|
f"Run the deficit analysis against the Cora report:\n\n"
|
|
f"```bash\n"
|
|
f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_prep.py '
|
|
f'"{work_dir}/existing_content.md" "{cora_path}" --format json '
|
|
f'> "{work_dir}/prep_data.json"\n'
|
|
f"```\n\n"
|
|
f"This produces `prep_data.json` with word count deficits, missing entities, "
|
|
f"density targets, and template generation instructions.",
|
|
|
|
# Step 3 — Filter entities (LLM step)
|
|
f"\n## Step 3 — Filter Missing Entities for Topical Relevance\n\n"
|
|
f'Read `{work_dir}/prep_data.json` and extract the `missing_entities` list. '
|
|
f"Filter this list to keep ONLY entities that are topically relevant to "
|
|
f"'{keyword}' and the page content. Remove generic/off-topic entities.\n\n"
|
|
f"Write one entity per line to `{work_dir}/filtered_entities.txt`.\n\n"
|
|
f"Be aggressive about filtering — only keep entities that a subject-matter "
|
|
f"expert would expect to see on a page about '{keyword}'.",
|
|
|
|
# Step 4 — Write templates (LLM step)
|
|
f"\n## Step 4 — Write Heading + Body Templates\n\n"
|
|
f"Using the deficit data from `{work_dir}/prep_data.json` and the filtered "
|
|
f"entities from `{work_dir}/filtered_entities.txt`, write:\n\n"
|
|
f"1. H2 and H3 headings that incorporate target entities\n"
|
|
f"2. Body sentence templates with `{{N}}` placeholder slots where entity "
|
|
f"terms will be inserted programmatically\n\n"
|
|
f"Format: Each template is a heading line followed by body sentences. "
|
|
f"Each body sentence should have 1-3 `{{N}}` slots (numbered sequentially "
|
|
f"starting from 1 within each sentence).\n\n"
|
|
f"Write the output to `{work_dir}/templates.txt`.\n\n"
|
|
f"Example format:\n"
|
|
f"```\n"
|
|
f"## Heading About {{1}} and {{2}}\n"
|
|
f"Sentence with {{1}} integrated naturally. Another point about {{2}} "
|
|
f"that provides value.\n"
|
|
f"```",
|
|
|
|
# Step 5 — Generate test block (script)
|
|
f"\n## Step 5 — Generate Test Block\n\n"
|
|
f"Run the test block generator to fill template slots and produce the "
|
|
f"HTML test block:\n\n"
|
|
f"```bash\n"
|
|
f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_generator.py '
|
|
f'"{work_dir}/templates.txt" "{work_dir}/prep_data.json" "{cora_path}" '
|
|
f'--entities-file "{work_dir}/filtered_entities.txt" '
|
|
f'--output-dir "{work_dir}"\n'
|
|
f"```\n\n"
|
|
f"This produces `test_block.md`, `test_block.html`, and `test_block_stats.json` "
|
|
f"in the working directory.",
|
|
|
|
# Step 6 — Rewrite for readability (LLM step)
|
|
f"\n## Step 6 — Rewrite Body Sentences for Readability\n\n"
|
|
f"Read `{work_dir}/test_block.md`. Rewrite each body sentence to improve "
|
|
f"readability and natural flow while preserving:\n"
|
|
f"- ALL entity strings exactly as they appear (do not paraphrase entity terms)\n"
|
|
f"- The overall heading structure\n"
|
|
f"- The `<!-- HIDDEN TEST BLOCK -->` markers\n\n"
|
|
f"Write the improved version back to `{work_dir}/test_block.md`.\n"
|
|
f"Then regenerate the HTML version at `{work_dir}/test_block.html` with the "
|
|
f"content wrapped in `<div style=\"display:none\">` tags.",
|
|
|
|
# Step 7 — Validate (script)
|
|
f"\n## Step 7 — Validate Test Block\n\n"
|
|
f"Run the before/after validation:\n\n"
|
|
f"```bash\n"
|
|
f'cd "{scripts_dir}" && uv run --with openpyxl python test_block_validate.py '
|
|
f'"{work_dir}/existing_content.md" "{work_dir}/test_block.md" "{cora_path}" '
|
|
f'--format json --output "{work_dir}/validation_report.json"\n'
|
|
f"```\n\n"
|
|
f"This produces `validation_report.json` with before/after metrics comparison.",
|
|
|
|
# Step 8 — Generate optimization instructions (LLM step)
|
|
f"\n## Step 8 — Generate Optimization Instructions\n\n"
|
|
f"Read the following files:\n"
|
|
f"- `{work_dir}/existing_content.md` (current page)\n"
|
|
f"- `{work_dir}/prep_data.json` (deficit analysis)\n"
|
|
f"- `{work_dir}/validation_report.json` (before/after metrics)\n"
|
|
f"- `{work_dir}/test_block.md` (generated test block)\n\n"
|
|
f"Generate `{work_dir}/optimization_instructions.md` — a surgical playbook "
|
|
f"for the human editor with these sections:\n\n"
|
|
f"1. **Executive Summary** — one-paragraph overview of optimization opportunity\n"
|
|
f"2. **Heading Changes** — specific H1/H2/H3 modifications with before/after\n"
|
|
f"3. **Sections to Expand** — which sections need more content and what to add\n"
|
|
f"4. **Entity Integration Points** — exact locations to weave in missing entities\n"
|
|
f"5. **Meta Tag Updates** — title tag and meta description recommendations\n"
|
|
f"6. **Content Gaps** — topics covered by competitors but missing from this page\n"
|
|
f"7. **Priority Ranking** — rank all changes by expected SEO impact (high/medium/low)\n\n"
|
|
f"Be specific and actionable. Reference exact headings and paragraphs from "
|
|
f"the existing content. Do NOT rewrite the full page — this is a surgical guide.",
|
|
]
|
|
|
|
if is_service_page:
|
|
parts.append(
|
|
f'\nNOTE: This is a **service page**. Company capabilities: '
|
|
f'"{capabilities_default}"\n'
|
|
f"Do NOT make specific claims about services, certifications, or "
|
|
f"licenses not found on the existing page."
|
|
)
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Optimization pipeline (Phase 3 — test block + surgical instructions)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _run_optimization(
|
|
*,
|
|
agent,
|
|
config,
|
|
ctx: dict | None,
|
|
task_id: str,
|
|
url: str,
|
|
keyword: str,
|
|
cora_path: str,
|
|
is_service_page: bool = False,
|
|
capabilities_default: str = "",
|
|
) -> str:
|
|
"""Run the Phase 3 optimization pipeline.
|
|
|
|
Requires a Cora report. Creates an isolated working directory, calls the
|
|
execution brain with the 8-step optimization prompt, then finalizes by
|
|
collecting deliverables and syncing ClickUp.
|
|
"""
|
|
if not cora_path:
|
|
msg = (
|
|
f"Error: No Cora report found for keyword '{keyword}'. "
|
|
f"A Cora report is required for the optimization pipeline. "
|
|
f"Please upload a Cora .xlsx report to the content Cora inbox."
|
|
)
|
|
log.error(msg)
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, msg)
|
|
return msg
|
|
|
|
slug = _slugify(keyword) or "unknown"
|
|
work_dir = _LOCAL_CONTENT_DIR / slug / f"optimization-{task_id or 'manual'}"
|
|
work_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
scripts_dir = str(_SCRIPTS_DIR)
|
|
|
|
# ClickUp: move to automation underway
|
|
if task_id:
|
|
_sync_clickup_start(ctx, task_id)
|
|
|
|
prompt = _build_optimization_prompt(
|
|
url=url,
|
|
keyword=keyword,
|
|
cora_path=cora_path,
|
|
work_dir=str(work_dir),
|
|
scripts_dir=scripts_dir,
|
|
is_service_page=is_service_page,
|
|
capabilities_default=capabilities_default,
|
|
)
|
|
|
|
log.info(
|
|
"Optimization pipeline — running for '%s' (%s), work_dir=%s",
|
|
keyword, url, work_dir,
|
|
)
|
|
try:
|
|
exec_result = agent.execute_task(
|
|
prompt,
|
|
tools=EXEC_TOOLS,
|
|
skip_permissions=True,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f"Optimization pipeline execution failed: {e}"
|
|
log.error(error_msg)
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, str(e))
|
|
return f"Error: {error_msg}"
|
|
|
|
if exec_result.startswith("Error:"):
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, exec_result)
|
|
return exec_result
|
|
|
|
return _finalize_optimization(
|
|
ctx=ctx,
|
|
config=config,
|
|
task_id=task_id,
|
|
keyword=keyword,
|
|
url=url,
|
|
work_dir=work_dir,
|
|
exec_result=exec_result,
|
|
)
|
|
|
|
|
|
def _finalize_optimization(
|
|
*,
|
|
ctx: dict | None,
|
|
config,
|
|
task_id: str,
|
|
keyword: str,
|
|
url: str,
|
|
work_dir: Path,
|
|
exec_result: str,
|
|
) -> str:
|
|
"""Collect deliverables from the working directory and sync ClickUp.
|
|
|
|
Required files: test_block.html, optimization_instructions.md.
|
|
Optional: validation_report.json.
|
|
"""
|
|
required = ["test_block.html", "optimization_instructions.md"]
|
|
missing = [f for f in required if not (work_dir / f).exists()]
|
|
if missing:
|
|
error_msg = (
|
|
f"Optimization pipeline finished but required deliverables are "
|
|
f"missing: {', '.join(missing)}. Working directory: {work_dir}"
|
|
)
|
|
log.error(error_msg)
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, error_msg)
|
|
return f"Error: {error_msg}"
|
|
|
|
# Collect all deliverable paths
|
|
deliverable_names = [
|
|
"test_block.html",
|
|
"optimization_instructions.md",
|
|
"validation_report.json",
|
|
]
|
|
found_files: dict[str, Path] = {}
|
|
for name in deliverable_names:
|
|
fpath = work_dir / name
|
|
if fpath.exists():
|
|
found_files[name] = fpath
|
|
|
|
# Copy deliverables to network path (if configured)
|
|
slug = _slugify(keyword) or "unknown"
|
|
if config and config.content.outline_dir:
|
|
net_dir = Path(config.content.outline_dir) / slug
|
|
try:
|
|
net_dir.mkdir(parents=True, exist_ok=True)
|
|
for name, fpath in found_files.items():
|
|
dest = net_dir / name
|
|
dest.write_bytes(fpath.read_bytes())
|
|
log.info("Copied %s -> %s", fpath, dest)
|
|
except OSError as e:
|
|
log.warning("Could not copy deliverables to network path %s: %s", net_dir, e)
|
|
|
|
# Sync ClickUp
|
|
if task_id:
|
|
_sync_clickup_optimization_complete(
|
|
ctx=ctx,
|
|
config=config,
|
|
task_id=task_id,
|
|
keyword=keyword,
|
|
url=url,
|
|
found_files=found_files,
|
|
work_dir=work_dir,
|
|
)
|
|
|
|
file_list = "\n".join(f"- `{p}`" for p in found_files.values())
|
|
return (
|
|
f"## Optimization Complete\n\n"
|
|
f"**Keyword:** {keyword}\n"
|
|
f"**URL:** {url}\n"
|
|
f"**Deliverables:**\n{file_list}\n\n"
|
|
f"---\n\n{exec_result}\n\n"
|
|
f"## ClickUp Sync\nOptimization complete. Status: internal review."
|
|
)
|
|
|
|
|
|
def _sync_clickup_optimization_complete(
|
|
*,
|
|
ctx: dict | None,
|
|
config,
|
|
task_id: str,
|
|
keyword: str,
|
|
url: str,
|
|
found_files: dict[str, Path],
|
|
work_dir: Path,
|
|
) -> None:
|
|
"""Upload optimization deliverables to ClickUp and set status."""
|
|
if not task_id or not ctx:
|
|
return
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return
|
|
try:
|
|
# Upload attachments
|
|
for name, fpath in found_files.items():
|
|
try:
|
|
client.upload_attachment(task_id, fpath)
|
|
log.info("Uploaded %s to ClickUp task %s", name, task_id)
|
|
except Exception as e:
|
|
log.warning("Failed to upload %s: %s", name, e)
|
|
|
|
# Build comment with validation summary
|
|
comment_parts = [
|
|
f"[DONE]Optimization pipeline complete for '{keyword}'.\n",
|
|
f"**URL:** {url}\n",
|
|
"**Deliverables attached:**",
|
|
]
|
|
for name in found_files:
|
|
comment_parts.append(f"- {name}")
|
|
|
|
# Include validation summary if available
|
|
val_path = work_dir / "validation_report.json"
|
|
if val_path.exists():
|
|
try:
|
|
import json
|
|
val_data = json.loads(val_path.read_text(encoding="utf-8"))
|
|
summary = val_data.get("summary", "")
|
|
if summary:
|
|
comment_parts.append(f"\n**Validation Summary:**\n{summary}")
|
|
except Exception:
|
|
pass
|
|
|
|
comment_parts.append(
|
|
"\n**Next Steps:**\n"
|
|
"1. Review `optimization_instructions.md` for surgical changes\n"
|
|
"2. Deploy `test_block.html` hidden div to the page\n"
|
|
"3. Monitor rankings for 2-4 weeks\n"
|
|
"4. Apply surgical changes from the instructions doc"
|
|
)
|
|
|
|
client.add_comment(task_id, "\n".join(comment_parts))
|
|
client.update_task_status(task_id, config.clickup.review_status)
|
|
except Exception as e:
|
|
log.warning("Failed to sync optimization complete for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main tool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@tool(
|
|
"create_content",
|
|
"SEO content creation. Set content_type='new content' for new pages "
|
|
"(Phase 1 outline → Phase 2 full write), or content_type='on page optimization' "
|
|
"to optimize an existing page with Cora data. Auto-detects phase from ClickUp "
|
|
"task status ('outline approved' → Phase 2). Ask the user which type if unclear.",
|
|
category="content",
|
|
)
|
|
def create_content(
|
|
keyword: str,
|
|
url: str = "",
|
|
content_type: str = "",
|
|
cli_flags: str = "",
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Create SEO content in two phases with human review between them.
|
|
|
|
Args:
|
|
keyword: Primary target keyword (e.g. "plumbing services").
|
|
url: Target page URL (optional for new content, required for optimization).
|
|
content_type: 'new content' or 'on page optimization'. Controls routing.
|
|
If empty, inferred from URL presence as fallback.
|
|
cli_flags: Optional flags (e.g. "service" for service page hint).
|
|
"""
|
|
if not keyword:
|
|
return "Error: 'keyword' is required."
|
|
if not ctx or "agent" not in ctx:
|
|
return "Error: Tool context with agent is required."
|
|
|
|
# Auto-detect content_type from URL presence when not explicitly set
|
|
if not content_type:
|
|
content_type = "on page optimization" if url else "new content"
|
|
|
|
# Service page hint from cli_flags
|
|
is_service_page = bool(cli_flags and "service" in cli_flags.lower())
|
|
|
|
agent = ctx["agent"]
|
|
config = ctx.get("config")
|
|
db = ctx.get("db")
|
|
task_id = ctx.get("clickup_task_id", "")
|
|
|
|
# Determine phase from ClickUp task status
|
|
# Prefer status passed by scheduler (pre-status-change) over re-fetching
|
|
phase = 1
|
|
original_status = ctx.get("clickup_task_status", "")
|
|
if original_status and original_status.lower() == "outline approved":
|
|
phase = 2
|
|
elif task_id and ctx:
|
|
client = _get_clickup_client(ctx)
|
|
if client:
|
|
try:
|
|
task = client.get_task(task_id)
|
|
if task.status.lower() == "outline approved":
|
|
phase = 2
|
|
except Exception as e:
|
|
log.warning("Could not check ClickUp status for phase detection: %s", e)
|
|
finally:
|
|
client.close()
|
|
|
|
# Find Cora report
|
|
cora_inbox = config.content.cora_inbox if config else ""
|
|
cora_path = _find_cora_report(keyword, cora_inbox)
|
|
if cora_path:
|
|
log.info("Found Cora report for '%s': %s", keyword, cora_path)
|
|
|
|
capabilities_default = config.content.company_capabilities_default if config else ""
|
|
|
|
# Optimization path: content_type determines route (URL fallback for chat callers)
|
|
if content_type.lower() == "on page optimization":
|
|
if not url:
|
|
return "Error: On Page Optimization requires a URL (IMSURL field)."
|
|
return _run_optimization(
|
|
agent=agent,
|
|
config=config,
|
|
ctx=ctx,
|
|
task_id=task_id,
|
|
url=url,
|
|
keyword=keyword,
|
|
cora_path=cora_path,
|
|
is_service_page=is_service_page,
|
|
capabilities_default=capabilities_default,
|
|
)
|
|
|
|
# New content path: Phase 1 (outline) → human review → Phase 2 (write)
|
|
if phase == 1:
|
|
return _run_phase1(
|
|
agent=agent,
|
|
config=config,
|
|
db=db,
|
|
ctx=ctx,
|
|
task_id=task_id,
|
|
url=url,
|
|
keyword=keyword,
|
|
content_type=content_type,
|
|
cora_path=cora_path,
|
|
capabilities_default=capabilities_default,
|
|
is_service_page=is_service_page,
|
|
)
|
|
else:
|
|
return _run_phase2(
|
|
agent=agent,
|
|
config=config,
|
|
db=db,
|
|
ctx=ctx,
|
|
task_id=task_id,
|
|
url=url,
|
|
keyword=keyword,
|
|
cora_path=cora_path,
|
|
existing_state={},
|
|
is_service_page=is_service_page,
|
|
capabilities_default=capabilities_default,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Phase 1: Research + Outline
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _run_phase1(
|
|
*,
|
|
agent,
|
|
config,
|
|
db,
|
|
ctx,
|
|
task_id: str,
|
|
url: str,
|
|
keyword: str,
|
|
content_type: str,
|
|
cora_path: str,
|
|
capabilities_default: str,
|
|
is_service_page: bool = False,
|
|
) -> str:
|
|
# Compute the outline save path upfront so the execution brain writes
|
|
# directly to the network share (or local fallback).
|
|
slug = _slugify(keyword) or "unknown"
|
|
outline_path = ""
|
|
if config.content.outline_dir:
|
|
primary = Path(config.content.outline_dir) / slug
|
|
try:
|
|
primary.mkdir(parents=True, exist_ok=True)
|
|
outline_path = str(primary / "outline.md")
|
|
except OSError as e:
|
|
log.warning("Network path unavailable (%s), falling back to local: %s", primary, e)
|
|
if not outline_path:
|
|
local = _LOCAL_CONTENT_DIR / slug
|
|
local.mkdir(parents=True, exist_ok=True)
|
|
outline_path = str(local / "outline.md")
|
|
|
|
# ClickUp: move to automation underway
|
|
if task_id:
|
|
_sync_clickup_start(ctx, task_id)
|
|
|
|
prompt = _build_phase1_prompt(
|
|
url, keyword, content_type, cora_path, capabilities_default, is_service_page,
|
|
outline_save_path=outline_path,
|
|
)
|
|
|
|
log.info("Phase 1 — researching + outlining for '%s' (%s)", keyword, url or "new content")
|
|
try:
|
|
result = agent.execute_task(
|
|
prompt,
|
|
tools=EXEC_TOOLS,
|
|
skip_permissions=True,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f"Phase 1 execution failed: {e}"
|
|
log.error(error_msg)
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, str(e))
|
|
return f"Error: {error_msg}"
|
|
|
|
if result.startswith("Error:"):
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, result)
|
|
return result
|
|
|
|
# Verify the outline was saved by the execution brain
|
|
if not Path(outline_path).is_file():
|
|
log.warning(
|
|
"Execution brain did not save outline to %s; saving result text as fallback.",
|
|
outline_path,
|
|
)
|
|
Path(outline_path).parent.mkdir(parents=True, exist_ok=True)
|
|
Path(outline_path).write_text(result, encoding="utf-8")
|
|
|
|
log.info("Outline saved to: %s", outline_path)
|
|
|
|
# ClickUp: move to outline review + store OutlinePath
|
|
if task_id:
|
|
_sync_clickup_outline_ready(ctx, task_id, outline_path)
|
|
|
|
url_line = f"**URL:** {url}\n" if url else "**Type:** New content\n"
|
|
return (
|
|
f"## Phase 1 Complete — Outline Ready for Review\n\n"
|
|
f"**Keyword:** {keyword}\n"
|
|
f"{url_line}"
|
|
f"**Outline saved to:** `{outline_path}`\n\n"
|
|
f"Please review and edit the outline. When ready, move the ClickUp task "
|
|
f"to **outline approved** to trigger Phase 2 (full content writing).\n\n"
|
|
f"---\n\n{result}\n\n"
|
|
f"## ClickUp Sync\nPhase 1 complete. Status: outline review."
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Phase 2: Write Full Content
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _resolve_outline_path(ctx: dict | None, task_id: str, keyword: str, config) -> str:
|
|
"""Resolve the outline path from ClickUp field or convention.
|
|
|
|
Priority: ClickUp OutlinePath field → convention path → empty string.
|
|
"""
|
|
# Try ClickUp custom field first
|
|
if task_id and ctx:
|
|
client = _get_clickup_client(ctx)
|
|
if client:
|
|
try:
|
|
outline_path = client.get_custom_field_by_name(task_id, "OutlinePath")
|
|
if outline_path and str(outline_path).strip():
|
|
return str(outline_path).strip()
|
|
except Exception as e:
|
|
log.warning("Failed to read OutlinePath from ClickUp for %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
# Fallback to convention path
|
|
slug = _slugify(keyword)
|
|
if slug and config and config.content.outline_dir:
|
|
convention_path = Path(config.content.outline_dir) / slug / "outline.md"
|
|
if convention_path.exists():
|
|
return str(convention_path)
|
|
|
|
# Try local fallback too
|
|
if slug:
|
|
local_path = _LOCAL_CONTENT_DIR / slug / "outline.md"
|
|
if local_path.exists():
|
|
return str(local_path)
|
|
|
|
return ""
|
|
|
|
|
|
def _run_phase2(
|
|
*,
|
|
agent,
|
|
config,
|
|
db,
|
|
ctx,
|
|
task_id: str,
|
|
url: str,
|
|
keyword: str,
|
|
cora_path: str,
|
|
existing_state: dict,
|
|
is_service_page: bool = False,
|
|
capabilities_default: str = "",
|
|
) -> str:
|
|
# Resolve outline path: ClickUp field → convention
|
|
outline_path = _resolve_outline_path(ctx, task_id, keyword, config)
|
|
|
|
outline_text = ""
|
|
if outline_path:
|
|
try:
|
|
outline_text = Path(outline_path).read_text(encoding="utf-8")
|
|
except OSError as e:
|
|
log.warning("Could not read outline at %s: %s", outline_path, e)
|
|
|
|
if not outline_text:
|
|
# Reset task to "to do" so it can be retried after re-running Phase 1
|
|
if task_id and ctx:
|
|
client = _get_clickup_client(ctx)
|
|
if client:
|
|
try:
|
|
poll_sts = config.clickup.poll_statuses
|
|
reset_status = poll_sts[0] if poll_sts else "to do"
|
|
client.update_task_status(task_id, reset_status)
|
|
client.add_comment(
|
|
task_id,
|
|
f"[WARNING]Outline file not found for keyword '{keyword}'. "
|
|
f"Searched: {outline_path or '(no path saved)'}. "
|
|
f"Please re-run Phase 1 (create_content) to generate a new outline.",
|
|
)
|
|
except Exception as e:
|
|
log.warning("Failed to reset task %s after missing outline: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
return (
|
|
"Error: Outline file not found. "
|
|
f"Searched at: {outline_path or '(no path saved)'}. "
|
|
f"Please re-run Phase 1 (create_content) to generate a new outline."
|
|
)
|
|
|
|
# Use saved cora_path from state if we don't have one now
|
|
if not cora_path:
|
|
cora_path = existing_state.get("cora_path", "")
|
|
|
|
# Compute save path for the final content HTML (network share with local fallback)
|
|
slug = _slugify(keyword) or "unknown"
|
|
content_path = ""
|
|
if config.content.outline_dir:
|
|
primary = Path(config.content.outline_dir) / slug
|
|
try:
|
|
primary.mkdir(parents=True, exist_ok=True)
|
|
content_path = str(primary / "final-content.html")
|
|
except OSError as e:
|
|
log.warning("Network path unavailable (%s), falling back to local: %s", primary, e)
|
|
if not content_path:
|
|
local = _LOCAL_CONTENT_DIR / slug
|
|
local.mkdir(parents=True, exist_ok=True)
|
|
content_path = str(local / "final-content.html")
|
|
|
|
# ClickUp: move to automation underway
|
|
if task_id:
|
|
_sync_clickup_start(ctx, task_id)
|
|
|
|
prompt = _build_phase2_prompt(
|
|
url, keyword, outline_text, cora_path, is_service_page, capabilities_default,
|
|
content_path=content_path,
|
|
)
|
|
|
|
log.info("Phase 2 — writing full content for '%s' (%s)", keyword, url)
|
|
try:
|
|
result = agent.execute_task(
|
|
prompt,
|
|
tools=EXEC_TOOLS,
|
|
skip_permissions=True,
|
|
)
|
|
except Exception as e:
|
|
error_msg = f"Phase 2 execution failed: {e}"
|
|
log.error(error_msg)
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, str(e))
|
|
return f"Error: {error_msg}"
|
|
|
|
if result.startswith("Error:"):
|
|
if task_id:
|
|
_sync_clickup_fail(ctx, task_id, result)
|
|
return result
|
|
|
|
# Save final content
|
|
content_path = _save_content(result, keyword, "final-content.md", config)
|
|
log.info("Final content saved to: %s", content_path)
|
|
|
|
# ClickUp: move to internal review
|
|
if task_id:
|
|
_sync_clickup_complete(ctx, task_id, content_path)
|
|
|
|
url_line = f"**URL:** {url}\n" if url else "**Type:** New content\n"
|
|
return (
|
|
f"## Phase 2 Complete — Content Written\n\n"
|
|
f"**Keyword:** {keyword}\n"
|
|
f"{url_line}"
|
|
f"**Content saved to:** `{content_path}`\n\n"
|
|
f"---\n\n{result}\n\n"
|
|
f"## ClickUp Sync\nPhase 2 complete. Status: internal review."
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Continue content (chat-initiated Phase 2)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@tool(
|
|
"continue_content",
|
|
"Resume content creation for a keyword that has an approved outline. "
|
|
"Runs Phase 2 (full content writing) for a previously outlined keyword.",
|
|
category="content",
|
|
)
|
|
def continue_content(
|
|
keyword: str,
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Resume content writing for a keyword with an approved outline.
|
|
|
|
Args:
|
|
keyword: The keyword to continue writing content for.
|
|
"""
|
|
if not keyword:
|
|
return "Error: 'keyword' is required."
|
|
if not ctx or "agent" not in ctx:
|
|
return "Error: Tool context with agent is required."
|
|
|
|
config = ctx.get("config")
|
|
db = ctx.get("db")
|
|
|
|
# Query ClickUp for tasks in "outline approved" or "outline review" status
|
|
# matching the keyword
|
|
client = _get_clickup_client(ctx)
|
|
if client:
|
|
try:
|
|
space_id = config.clickup.space_id if config else ""
|
|
if space_id:
|
|
tasks = client.get_tasks_from_space(
|
|
space_id,
|
|
statuses=["outline approved", "outline review"],
|
|
)
|
|
keyword_lower = keyword.lower().strip()
|
|
for task in tasks:
|
|
task_keyword = task.custom_fields.get("Keyword", "")
|
|
if str(task_keyword).lower().strip() == keyword_lower:
|
|
task_id = task.id
|
|
url = task.custom_fields.get("IMSURL", "") or ""
|
|
cora_inbox = config.content.cora_inbox if config else ""
|
|
cora_path = _find_cora_report(keyword, cora_inbox)
|
|
|
|
return _run_phase2(
|
|
agent=ctx["agent"],
|
|
config=config,
|
|
db=db,
|
|
ctx=ctx,
|
|
task_id=task_id,
|
|
url=str(url),
|
|
keyword=keyword,
|
|
cora_path=cora_path or "",
|
|
existing_state={},
|
|
)
|
|
except Exception as e:
|
|
log.warning("ClickUp query failed in continue_content: %s", e)
|
|
finally:
|
|
client.close()
|
|
|
|
# Fallback: try to run Phase 2 without a ClickUp task (outline must exist locally)
|
|
outline_path = _resolve_outline_path(ctx, "", keyword, config)
|
|
if outline_path:
|
|
return _run_phase2(
|
|
agent=ctx["agent"],
|
|
config=config,
|
|
db=db,
|
|
ctx=ctx,
|
|
task_id="",
|
|
url="",
|
|
keyword=keyword,
|
|
cora_path="",
|
|
existing_state={},
|
|
)
|
|
|
|
return (
|
|
f"No outline awaiting review found for keyword '{keyword}'. "
|
|
f"Use create_content to start Phase 1 first."
|
|
)
|