521 lines
18 KiB
Python
521 lines
18 KiB
Python
"""Link-building content pipeline tool.
|
|
|
|
Autonomous workflow:
|
|
1. Look up company info from companies.md
|
|
2. Generate a guest article (500-700 words) via execution brain
|
|
3. Generate a resource/directory blurb via execution brain
|
|
4. Generate a social media post via chat brain
|
|
5. Save all content to files, return cost summary
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
import time
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_ROOT_DIR = Path(__file__).resolve().parent.parent.parent
|
|
_SKILLS_DIR = _ROOT_DIR / "skills"
|
|
_DATA_DIR = _ROOT_DIR / "data"
|
|
_OUTPUT_DIR = _DATA_DIR / "generated" / "link_building"
|
|
_COMPANIES_FILE = _SKILLS_DIR / "companies.md"
|
|
|
|
SONNET_CLI_MODEL = "sonnet"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Status / helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _set_status(ctx: dict | None, message: str) -> None:
|
|
"""Write pipeline progress to the DB so the UI can poll it."""
|
|
if ctx and "db" in ctx:
|
|
ctx["db"].kv_set("pipeline:status", message)
|
|
|
|
|
|
def _slugify(text: str) -> str:
|
|
"""Turn a phrase into a filesystem-safe slug."""
|
|
text = text.lower().strip()
|
|
text = re.sub(r"[^\w\s-]", "", text)
|
|
text = re.sub(r"[\s_]+", "-", text)
|
|
return text[:60].strip("-")
|
|
|
|
|
|
def _word_count(text: str) -> int:
|
|
return len(text.split())
|
|
|
|
|
|
def _fuzzy_company_match(name: str, candidate: str) -> bool:
|
|
"""Check if name fuzzy-matches a candidate string."""
|
|
if not name or not candidate:
|
|
return False
|
|
a, b = name.lower().strip(), candidate.lower().strip()
|
|
return a == b or a in b or b in a
|
|
|
|
|
|
def _extract_keyword_from_task_name(task_name: str) -> str:
|
|
"""Extract keyword from ClickUp task name like 'LINKS - precision cnc turning'."""
|
|
if " - " in task_name:
|
|
return task_name.split(" - ", 1)[1].strip()
|
|
return task_name.strip()
|
|
|
|
|
|
def _load_skill(filename: str) -> str:
|
|
"""Read a markdown skill file from the skills/ directory, stripping frontmatter."""
|
|
path = _SKILLS_DIR / filename
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Skill file not found: {path}")
|
|
text = path.read_text(encoding="utf-8")
|
|
|
|
# Strip YAML frontmatter (--- ... ---) if present
|
|
if text.startswith("---"):
|
|
end = text.find("---", 3)
|
|
if end != -1:
|
|
text = text[end + 3:].strip()
|
|
|
|
return text
|
|
|
|
|
|
def _lookup_company(company_name: str) -> dict:
|
|
"""Look up company info from companies.md.
|
|
|
|
Returns a dict with keys: name, executive, pa_org_id, website, gbp.
|
|
"""
|
|
if not _COMPANIES_FILE.exists():
|
|
return {"name": company_name}
|
|
|
|
text = _COMPANIES_FILE.read_text(encoding="utf-8")
|
|
result = {"name": company_name}
|
|
|
|
# Parse companies.md format: ## Company Name followed by bullet fields
|
|
current_company = ""
|
|
for line in text.splitlines():
|
|
if line.startswith("## "):
|
|
current_company = line[3:].strip()
|
|
elif current_company and _fuzzy_company_match(company_name, current_company):
|
|
result["name"] = current_company
|
|
if line.startswith("- **Executive:**"):
|
|
result["executive"] = line.split(":**", 1)[1].strip()
|
|
elif line.startswith("- **PA Org ID:**"):
|
|
result["pa_org_id"] = line.split(":**", 1)[1].strip()
|
|
elif line.startswith("- **Website:**"):
|
|
result["website"] = line.split(":**", 1)[1].strip()
|
|
elif line.startswith("- **GBP:**"):
|
|
result["gbp"] = line.split(":**", 1)[1].strip()
|
|
|
|
return result
|
|
|
|
|
|
def _chat_call(agent, messages: list[dict]) -> str:
|
|
"""Make a non-streaming chat-brain call and return the full text."""
|
|
parts: list[str] = []
|
|
for chunk in agent.llm.chat(messages, tools=None, stream=False):
|
|
if chunk["type"] == "text":
|
|
parts.append(chunk["content"])
|
|
return "".join(parts)
|
|
|
|
|
|
def _get_clickup_client(ctx: dict | None):
|
|
"""Create a ClickUpClient from tool context, or None if unavailable."""
|
|
if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
|
|
return None
|
|
try:
|
|
from ..clickup import ClickUpClient
|
|
|
|
config = ctx["config"]
|
|
return ClickUpClient(
|
|
api_token=config.clickup.api_token,
|
|
workspace_id=config.clickup.workspace_id,
|
|
task_type_field_name=config.clickup.task_type_field_name,
|
|
)
|
|
except Exception as e:
|
|
log.warning("Could not create ClickUp client: %s", e)
|
|
return None
|
|
|
|
|
|
def _sync_clickup(ctx: dict | None, task_id: str, deliverable_paths: list[str],
|
|
summary: str) -> str:
|
|
"""Upload deliverables and update ClickUp task status. Returns sync report."""
|
|
if not task_id or not ctx:
|
|
return ""
|
|
|
|
client = _get_clickup_client(ctx)
|
|
if not client:
|
|
return ""
|
|
|
|
config = ctx["config"]
|
|
db = ctx.get("db")
|
|
lines = ["\n## ClickUp Sync"]
|
|
|
|
try:
|
|
# Upload attachments
|
|
uploaded = 0
|
|
for path in deliverable_paths:
|
|
if client.upload_attachment(task_id, path):
|
|
uploaded += 1
|
|
if uploaded:
|
|
lines.append(f"- Uploaded {uploaded} file(s)")
|
|
|
|
# Update status to review
|
|
client.update_task_status(task_id, config.clickup.review_status)
|
|
lines.append(f"- Status → '{config.clickup.review_status}'")
|
|
|
|
# Add comment
|
|
comment = (
|
|
f"✅ CheddahBot completed link building.\n\n"
|
|
f"{summary}\n\n"
|
|
f"📎 {uploaded} file(s) attached."
|
|
)
|
|
client.add_comment(task_id, comment)
|
|
lines.append("- Comment added")
|
|
|
|
# Update kv_store state
|
|
if db:
|
|
kv_key = f"clickup:task:{task_id}:state"
|
|
raw = db.kv_get(kv_key)
|
|
if raw:
|
|
try:
|
|
state = json.loads(raw)
|
|
state["state"] = "completed"
|
|
state["completed_at"] = datetime.now(UTC).isoformat()
|
|
state["deliverable_paths"] = [str(p) for p in deliverable_paths]
|
|
db.kv_set(kv_key, json.dumps(state))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
except Exception as e:
|
|
lines.append(f"- Sync error: {e}")
|
|
log.error("ClickUp sync failed for task %s: %s", task_id, e)
|
|
finally:
|
|
client.close()
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prompt builders
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _build_guest_article_prompt(
|
|
keyword: str, company_name: str, target_url: str, company_info: dict,
|
|
skill_prompt: str,
|
|
) -> str:
|
|
"""Build the prompt for the execution brain to write a guest article."""
|
|
executive = company_info.get("executive", "")
|
|
|
|
prompt = skill_prompt + "\n\n"
|
|
prompt += "## Assignment: Guest Article\n\n"
|
|
prompt += f"**Target Keyword:** {keyword}\n"
|
|
prompt += f"**Company:** {company_name}\n"
|
|
if executive:
|
|
prompt += f"**Executive/Contact:** {executive}\n"
|
|
if target_url:
|
|
prompt += f"**Target URL (for backlink):** {target_url}\n"
|
|
prompt += (
|
|
"\n**Instructions:**\n"
|
|
"Write a 500-700 word guest article suitable for industry blogs and "
|
|
"trade publications. The article should:\n"
|
|
"- Be informative and educational, NOT promotional\n"
|
|
"- Naturally incorporate the target keyword 2-3 times\n"
|
|
"- Include ONE natural backlink to the target URL using the keyword "
|
|
"or a close variation as anchor text\n"
|
|
"- Include a second branded mention of the company name (no link needed)\n"
|
|
"- Read like expert industry commentary, not an advertisement\n"
|
|
"- Have a compelling title (under 70 characters)\n"
|
|
"- Use subheadings to break up the content\n"
|
|
"- End with a brief author bio mentioning the company\n\n"
|
|
"Return ONLY the article text. No meta-commentary."
|
|
)
|
|
return prompt
|
|
|
|
|
|
def _build_directory_prompt(
|
|
keyword: str, company_name: str, target_url: str, branded_url: str,
|
|
company_info: dict,
|
|
) -> str:
|
|
"""Build the prompt for the execution brain to write a directory/citation entry."""
|
|
executive = company_info.get("executive", "")
|
|
website = company_info.get("website", "") or target_url
|
|
|
|
prompt = (
|
|
"## Assignment: Business Directory / Citation Entry\n\n"
|
|
f"**Company:** {company_name}\n"
|
|
f"**Target Keyword:** {keyword}\n"
|
|
)
|
|
if executive:
|
|
prompt += f"**Executive:** {executive}\n"
|
|
if website:
|
|
prompt += f"**Website:** {website}\n"
|
|
if branded_url:
|
|
prompt += f"**Social/GBP URL:** {branded_url}\n"
|
|
|
|
prompt += (
|
|
"\n**Instructions:**\n"
|
|
"Write a business directory entry / citation profile. Include:\n"
|
|
"1. **Company Description** (150-200 words) — Describe what the company "
|
|
"does, naturally incorporating the target keyword. Professional tone.\n"
|
|
"2. **Services List** (5-8 bullet points) — Key services/capabilities, "
|
|
"with the target keyword appearing in at least one bullet.\n"
|
|
"3. **About Section** (2-3 sentences) — Brief company background.\n\n"
|
|
"This will be used for industry directories, Google Business Profile, "
|
|
"and business listing sites. Keep it factual and professional.\n\n"
|
|
"Return ONLY the directory entry text. No meta-commentary."
|
|
)
|
|
return prompt
|
|
|
|
|
|
def _build_social_post_prompt(
|
|
keyword: str, company_name: str, target_url: str, article_title: str,
|
|
) -> str:
|
|
"""Build the prompt for the chat brain to write a social media post."""
|
|
prompt = (
|
|
f"Write a professional LinkedIn post for {company_name} about "
|
|
f"'{keyword}'. The post should:\n"
|
|
f"- Be 100-150 words\n"
|
|
f"- Reference the article: \"{article_title}\"\n"
|
|
f"- Include the link: {target_url}\n" if target_url else ""
|
|
f"- Use 2-3 relevant hashtags\n"
|
|
f"- Professional, not salesy\n"
|
|
f"- Encourage engagement (comment/share)\n\n"
|
|
"Return ONLY the post text."
|
|
)
|
|
return prompt
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main tool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@tool(
|
|
"build_links",
|
|
"Generate SEO link building content for a target keyword and company. "
|
|
"Produces a guest article, directory listing, and social post, each with "
|
|
"proper anchor text and backlinks. Files saved to data/generated/link_building/.",
|
|
category="linkbuilding",
|
|
)
|
|
def build_links(
|
|
keyword: str,
|
|
company_name: str,
|
|
target_url: str = "",
|
|
branded_url: str = "",
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Main link-building content pipeline.
|
|
|
|
Args:
|
|
keyword: Target SEO keyword (e.g., "precision cnc turning").
|
|
company_name: Client company name (e.g., "Chapter2").
|
|
target_url: Primary URL to build backlinks to (from IMSURL field).
|
|
branded_url: Secondary branded URL (from SocialURL field).
|
|
ctx: Injected tool context with config, db, agent.
|
|
|
|
Returns:
|
|
Summary of generated content with file paths.
|
|
"""
|
|
t0 = time.time()
|
|
agent = ctx.get("agent") if ctx else None
|
|
task_id = ctx.get("clickup_task_id", "") if ctx else ""
|
|
|
|
if not agent:
|
|
return "Error: link building tool requires agent context."
|
|
|
|
# Derive keyword from task name if it looks like "LINKS - keyword"
|
|
keyword = _extract_keyword_from_task_name(keyword) if keyword.startswith("LINKS") else keyword
|
|
|
|
log.info("Link building pipeline: keyword='%s', company='%s'", keyword, company_name)
|
|
_set_status(ctx, f"Link building: {company_name} — {keyword}")
|
|
|
|
# --- Company lookup ---
|
|
company_info = _lookup_company(company_name)
|
|
log.info("Company info: %s", company_info)
|
|
|
|
# --- Load skill prompt ---
|
|
try:
|
|
skill_prompt = _load_skill("linkbuilding.md")
|
|
except FileNotFoundError:
|
|
skill_prompt = ""
|
|
log.warning("linkbuilding.md skill not found, using inline prompts only")
|
|
|
|
# --- Create output directory ---
|
|
company_slug = _slugify(company_name)
|
|
keyword_slug = _slugify(keyword)
|
|
output_dir = _OUTPUT_DIR / company_slug / keyword_slug
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
results = []
|
|
deliverable_paths: list[str] = []
|
|
warnings: list[str] = []
|
|
|
|
# =====================================================================
|
|
# Step 1: Guest Article (execution brain)
|
|
# =====================================================================
|
|
_set_status(ctx, f"Link building: Writing guest article — {keyword}")
|
|
log.info("Step 1: Generating guest article for '%s'", keyword)
|
|
|
|
article_prompt = _build_guest_article_prompt(
|
|
keyword, company_name, target_url, company_info, skill_prompt,
|
|
)
|
|
try:
|
|
article_raw = agent.execute_task(article_prompt)
|
|
article_text = _clean_content(article_raw)
|
|
wc = _word_count(article_text)
|
|
|
|
if wc < 100:
|
|
warnings.append(f"Guest article too short ({wc} words)")
|
|
log.warning("Guest article too short: %d words", wc)
|
|
else:
|
|
article_path = output_dir / "guest-article.md"
|
|
article_path.write_text(article_text, encoding="utf-8")
|
|
deliverable_paths.append(str(article_path))
|
|
|
|
# Extract title from first line
|
|
article_title = article_text.splitlines()[0].strip("# ").strip()
|
|
results.append(
|
|
f"**Guest Article:** `{article_path}`\n"
|
|
f" Title: {article_title}\n"
|
|
f" Words: {wc}"
|
|
)
|
|
log.info("Guest article saved: %s (%d words)", article_path, wc)
|
|
except Exception as e:
|
|
warnings.append(f"Guest article generation failed: {e}")
|
|
log.error("Guest article failed: %s", e)
|
|
article_title = keyword # fallback for social post
|
|
|
|
# =====================================================================
|
|
# Step 2: Directory / Citation Entry (execution brain)
|
|
# =====================================================================
|
|
_set_status(ctx, f"Link building: Writing directory entry — {keyword}")
|
|
log.info("Step 2: Generating directory entry for '%s'", keyword)
|
|
|
|
directory_prompt = _build_directory_prompt(
|
|
keyword, company_name, target_url, branded_url, company_info,
|
|
)
|
|
try:
|
|
directory_raw = agent.execute_task(directory_prompt)
|
|
directory_text = _clean_content(directory_raw)
|
|
wc = _word_count(directory_text)
|
|
|
|
if wc < 30:
|
|
warnings.append(f"Directory entry too short ({wc} words)")
|
|
else:
|
|
dir_path = output_dir / "directory-listing.md"
|
|
dir_path.write_text(directory_text, encoding="utf-8")
|
|
deliverable_paths.append(str(dir_path))
|
|
results.append(
|
|
f"**Directory Listing:** `{dir_path}`\n"
|
|
f" Words: {wc}"
|
|
)
|
|
log.info("Directory listing saved: %s (%d words)", dir_path, wc)
|
|
except Exception as e:
|
|
warnings.append(f"Directory entry generation failed: {e}")
|
|
log.error("Directory entry failed: %s", e)
|
|
|
|
# =====================================================================
|
|
# Step 3: Social Media Post (chat brain — fast)
|
|
# =====================================================================
|
|
_set_status(ctx, f"Link building: Writing social post — {keyword}")
|
|
log.info("Step 3: Generating social post for '%s'", keyword)
|
|
|
|
social_prompt = _build_social_post_prompt(
|
|
keyword, company_name, target_url,
|
|
article_title if "article_title" in dir() else keyword,
|
|
)
|
|
try:
|
|
social_text = _chat_call(agent, [{"role": "user", "content": social_prompt}])
|
|
social_text = social_text.strip()
|
|
wc = _word_count(social_text)
|
|
|
|
if wc < 20:
|
|
warnings.append(f"Social post too short ({wc} words)")
|
|
else:
|
|
social_path = output_dir / "social-post.md"
|
|
social_path.write_text(social_text, encoding="utf-8")
|
|
deliverable_paths.append(str(social_path))
|
|
results.append(
|
|
f"**Social Post:** `{social_path}`\n"
|
|
f" Words: {wc}"
|
|
)
|
|
log.info("Social post saved: %s (%d words)", social_path, wc)
|
|
except Exception as e:
|
|
warnings.append(f"Social post generation failed: {e}")
|
|
log.error("Social post failed: %s", e)
|
|
|
|
# =====================================================================
|
|
# Summary
|
|
# =====================================================================
|
|
elapsed = time.time() - t0
|
|
_set_status(ctx, "")
|
|
|
|
summary_lines = [
|
|
f"# Link Building Complete: {company_name} — {keyword}\n",
|
|
f"**Keyword:** {keyword}",
|
|
f"**Company:** {company_info.get('name', company_name)}",
|
|
f"**Target URL:** {target_url or '(none)'}",
|
|
f"**Output Dir:** `{output_dir}`",
|
|
f"**Time:** {elapsed:.1f}s",
|
|
f"**Deliverables:** {len(deliverable_paths)}",
|
|
"",
|
|
]
|
|
|
|
if results:
|
|
summary_lines.append("## Generated Content")
|
|
summary_lines.extend(results)
|
|
|
|
if warnings:
|
|
summary_lines.append("\n## Warnings")
|
|
for w in warnings:
|
|
summary_lines.append(f"- ⚠️ {w}")
|
|
|
|
summary = "\n".join(summary_lines)
|
|
|
|
# --- ClickUp sync ---
|
|
if task_id:
|
|
sync_report = _sync_clickup(ctx, task_id, deliverable_paths, summary)
|
|
summary += sync_report
|
|
|
|
return summary
|
|
|
|
|
|
def _clean_content(raw: str) -> str:
|
|
"""Clean execution brain output to just the content text.
|
|
|
|
Strips common prefixes/suffixes the LLM might add.
|
|
"""
|
|
text = raw.strip()
|
|
|
|
# Remove common LLM wrapper text
|
|
for prefix in [
|
|
"Here is the",
|
|
"Here's the",
|
|
"Below is the",
|
|
"I've written",
|
|
"Sure, here",
|
|
"Certainly!",
|
|
]:
|
|
if text.lower().startswith(prefix.lower()):
|
|
# Skip to the first blank line after the prefix
|
|
idx = text.find("\n\n")
|
|
if idx != -1 and idx < 200:
|
|
text = text[idx:].strip()
|
|
break
|
|
|
|
# Remove trailing "---" or "Let me know" type endings
|
|
text = re.sub(r"\n---\s*$", "", text).strip()
|
|
text = re.sub(r"\n(Let me know|I hope|Feel free|Would you).*$", "", text, flags=re.DOTALL).strip()
|
|
|
|
return text
|