CheddahBot/cheddahbot/tools/linkbuilding.py

521 lines
18 KiB
Python

"""Link-building content pipeline tool.
Autonomous workflow:
1. Look up company info from companies.md
2. Generate a guest article (500-700 words) via execution brain
3. Generate a resource/directory blurb via execution brain
4. Generate a social media post via chat brain
5. Save all content to files, return cost summary
"""
from __future__ import annotations
import json
import logging
import re
import time
from datetime import UTC, datetime
from pathlib import Path
from . import tool
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
_ROOT_DIR = Path(__file__).resolve().parent.parent.parent
_SKILLS_DIR = _ROOT_DIR / "skills"
_DATA_DIR = _ROOT_DIR / "data"
_OUTPUT_DIR = _DATA_DIR / "generated" / "link_building"
_COMPANIES_FILE = _SKILLS_DIR / "companies.md"
SONNET_CLI_MODEL = "sonnet"
# ---------------------------------------------------------------------------
# Status / helpers
# ---------------------------------------------------------------------------
def _set_status(ctx: dict | None, message: str) -> None:
"""Write pipeline progress to the DB so the UI can poll it."""
if ctx and "db" in ctx:
ctx["db"].kv_set("pipeline:status", message)
def _slugify(text: str) -> str:
"""Turn a phrase into a filesystem-safe slug."""
text = text.lower().strip()
text = re.sub(r"[^\w\s-]", "", text)
text = re.sub(r"[\s_]+", "-", text)
return text[:60].strip("-")
def _word_count(text: str) -> int:
return len(text.split())
def _fuzzy_company_match(name: str, candidate: str) -> bool:
"""Check if name fuzzy-matches a candidate string."""
if not name or not candidate:
return False
a, b = name.lower().strip(), candidate.lower().strip()
return a == b or a in b or b in a
def _extract_keyword_from_task_name(task_name: str) -> str:
"""Extract keyword from ClickUp task name like 'LINKS - precision cnc turning'."""
if " - " in task_name:
return task_name.split(" - ", 1)[1].strip()
return task_name.strip()
def _load_skill(filename: str) -> str:
"""Read a markdown skill file from the skills/ directory, stripping frontmatter."""
path = _SKILLS_DIR / filename
if not path.exists():
raise FileNotFoundError(f"Skill file not found: {path}")
text = path.read_text(encoding="utf-8")
# Strip YAML frontmatter (--- ... ---) if present
if text.startswith("---"):
end = text.find("---", 3)
if end != -1:
text = text[end + 3:].strip()
return text
def _lookup_company(company_name: str) -> dict:
"""Look up company info from companies.md.
Returns a dict with keys: name, executive, pa_org_id, website, gbp.
"""
if not _COMPANIES_FILE.exists():
return {"name": company_name}
text = _COMPANIES_FILE.read_text(encoding="utf-8")
result = {"name": company_name}
# Parse companies.md format: ## Company Name followed by bullet fields
current_company = ""
for line in text.splitlines():
if line.startswith("## "):
current_company = line[3:].strip()
elif current_company and _fuzzy_company_match(company_name, current_company):
result["name"] = current_company
if line.startswith("- **Executive:**"):
result["executive"] = line.split(":**", 1)[1].strip()
elif line.startswith("- **PA Org ID:**"):
result["pa_org_id"] = line.split(":**", 1)[1].strip()
elif line.startswith("- **Website:**"):
result["website"] = line.split(":**", 1)[1].strip()
elif line.startswith("- **GBP:**"):
result["gbp"] = line.split(":**", 1)[1].strip()
return result
def _chat_call(agent, messages: list[dict]) -> str:
"""Make a non-streaming chat-brain call and return the full text."""
parts: list[str] = []
for chunk in agent.llm.chat(messages, tools=None, stream=False):
if chunk["type"] == "text":
parts.append(chunk["content"])
return "".join(parts)
def _get_clickup_client(ctx: dict | None):
"""Create a ClickUpClient from tool context, or None if unavailable."""
if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
return None
try:
from ..clickup import ClickUpClient
config = ctx["config"]
return ClickUpClient(
api_token=config.clickup.api_token,
workspace_id=config.clickup.workspace_id,
task_type_field_name=config.clickup.task_type_field_name,
)
except Exception as e:
log.warning("Could not create ClickUp client: %s", e)
return None
def _sync_clickup(ctx: dict | None, task_id: str, deliverable_paths: list[str],
summary: str) -> str:
"""Upload deliverables and update ClickUp task status. Returns sync report."""
if not task_id or not ctx:
return ""
client = _get_clickup_client(ctx)
if not client:
return ""
config = ctx["config"]
db = ctx.get("db")
lines = ["\n## ClickUp Sync"]
try:
# Upload attachments
uploaded = 0
for path in deliverable_paths:
if client.upload_attachment(task_id, path):
uploaded += 1
if uploaded:
lines.append(f"- Uploaded {uploaded} file(s)")
# Update status to review
client.update_task_status(task_id, config.clickup.review_status)
lines.append(f"- Status → '{config.clickup.review_status}'")
# Add comment
comment = (
f"✅ CheddahBot completed link building.\n\n"
f"{summary}\n\n"
f"📎 {uploaded} file(s) attached."
)
client.add_comment(task_id, comment)
lines.append("- Comment added")
# Update kv_store state
if db:
kv_key = f"clickup:task:{task_id}:state"
raw = db.kv_get(kv_key)
if raw:
try:
state = json.loads(raw)
state["state"] = "completed"
state["completed_at"] = datetime.now(UTC).isoformat()
state["deliverable_paths"] = [str(p) for p in deliverable_paths]
db.kv_set(kv_key, json.dumps(state))
except json.JSONDecodeError:
pass
except Exception as e:
lines.append(f"- Sync error: {e}")
log.error("ClickUp sync failed for task %s: %s", task_id, e)
finally:
client.close()
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Prompt builders
# ---------------------------------------------------------------------------
def _build_guest_article_prompt(
keyword: str, company_name: str, target_url: str, company_info: dict,
skill_prompt: str,
) -> str:
"""Build the prompt for the execution brain to write a guest article."""
executive = company_info.get("executive", "")
prompt = skill_prompt + "\n\n"
prompt += "## Assignment: Guest Article\n\n"
prompt += f"**Target Keyword:** {keyword}\n"
prompt += f"**Company:** {company_name}\n"
if executive:
prompt += f"**Executive/Contact:** {executive}\n"
if target_url:
prompt += f"**Target URL (for backlink):** {target_url}\n"
prompt += (
"\n**Instructions:**\n"
"Write a 500-700 word guest article suitable for industry blogs and "
"trade publications. The article should:\n"
"- Be informative and educational, NOT promotional\n"
"- Naturally incorporate the target keyword 2-3 times\n"
"- Include ONE natural backlink to the target URL using the keyword "
"or a close variation as anchor text\n"
"- Include a second branded mention of the company name (no link needed)\n"
"- Read like expert industry commentary, not an advertisement\n"
"- Have a compelling title (under 70 characters)\n"
"- Use subheadings to break up the content\n"
"- End with a brief author bio mentioning the company\n\n"
"Return ONLY the article text. No meta-commentary."
)
return prompt
def _build_directory_prompt(
keyword: str, company_name: str, target_url: str, branded_url: str,
company_info: dict,
) -> str:
"""Build the prompt for the execution brain to write a directory/citation entry."""
executive = company_info.get("executive", "")
website = company_info.get("website", "") or target_url
prompt = (
"## Assignment: Business Directory / Citation Entry\n\n"
f"**Company:** {company_name}\n"
f"**Target Keyword:** {keyword}\n"
)
if executive:
prompt += f"**Executive:** {executive}\n"
if website:
prompt += f"**Website:** {website}\n"
if branded_url:
prompt += f"**Social/GBP URL:** {branded_url}\n"
prompt += (
"\n**Instructions:**\n"
"Write a business directory entry / citation profile. Include:\n"
"1. **Company Description** (150-200 words) — Describe what the company "
"does, naturally incorporating the target keyword. Professional tone.\n"
"2. **Services List** (5-8 bullet points) — Key services/capabilities, "
"with the target keyword appearing in at least one bullet.\n"
"3. **About Section** (2-3 sentences) — Brief company background.\n\n"
"This will be used for industry directories, Google Business Profile, "
"and business listing sites. Keep it factual and professional.\n\n"
"Return ONLY the directory entry text. No meta-commentary."
)
return prompt
def _build_social_post_prompt(
keyword: str, company_name: str, target_url: str, article_title: str,
) -> str:
"""Build the prompt for the chat brain to write a social media post."""
prompt = (
f"Write a professional LinkedIn post for {company_name} about "
f"'{keyword}'. The post should:\n"
f"- Be 100-150 words\n"
f"- Reference the article: \"{article_title}\"\n"
f"- Include the link: {target_url}\n" if target_url else ""
f"- Use 2-3 relevant hashtags\n"
f"- Professional, not salesy\n"
f"- Encourage engagement (comment/share)\n\n"
"Return ONLY the post text."
)
return prompt
# ---------------------------------------------------------------------------
# Main tool
# ---------------------------------------------------------------------------
@tool(
"build_links",
"Generate SEO link building content for a target keyword and company. "
"Produces a guest article, directory listing, and social post, each with "
"proper anchor text and backlinks. Files saved to data/generated/link_building/.",
category="linkbuilding",
)
def build_links(
keyword: str,
company_name: str,
target_url: str = "",
branded_url: str = "",
ctx: dict | None = None,
) -> str:
"""Main link-building content pipeline.
Args:
keyword: Target SEO keyword (e.g., "precision cnc turning").
company_name: Client company name (e.g., "Chapter2").
target_url: Primary URL to build backlinks to (from IMSURL field).
branded_url: Secondary branded URL (from SocialURL field).
ctx: Injected tool context with config, db, agent.
Returns:
Summary of generated content with file paths.
"""
t0 = time.time()
agent = ctx.get("agent") if ctx else None
task_id = ctx.get("clickup_task_id", "") if ctx else ""
if not agent:
return "Error: link building tool requires agent context."
# Derive keyword from task name if it looks like "LINKS - keyword"
keyword = _extract_keyword_from_task_name(keyword) if keyword.startswith("LINKS") else keyword
log.info("Link building pipeline: keyword='%s', company='%s'", keyword, company_name)
_set_status(ctx, f"Link building: {company_name}{keyword}")
# --- Company lookup ---
company_info = _lookup_company(company_name)
log.info("Company info: %s", company_info)
# --- Load skill prompt ---
try:
skill_prompt = _load_skill("linkbuilding.md")
except FileNotFoundError:
skill_prompt = ""
log.warning("linkbuilding.md skill not found, using inline prompts only")
# --- Create output directory ---
company_slug = _slugify(company_name)
keyword_slug = _slugify(keyword)
output_dir = _OUTPUT_DIR / company_slug / keyword_slug
output_dir.mkdir(parents=True, exist_ok=True)
results = []
deliverable_paths: list[str] = []
warnings: list[str] = []
# =====================================================================
# Step 1: Guest Article (execution brain)
# =====================================================================
_set_status(ctx, f"Link building: Writing guest article — {keyword}")
log.info("Step 1: Generating guest article for '%s'", keyword)
article_prompt = _build_guest_article_prompt(
keyword, company_name, target_url, company_info, skill_prompt,
)
try:
article_raw = agent.execute_task(article_prompt)
article_text = _clean_content(article_raw)
wc = _word_count(article_text)
if wc < 100:
warnings.append(f"Guest article too short ({wc} words)")
log.warning("Guest article too short: %d words", wc)
else:
article_path = output_dir / "guest-article.md"
article_path.write_text(article_text, encoding="utf-8")
deliverable_paths.append(str(article_path))
# Extract title from first line
article_title = article_text.splitlines()[0].strip("# ").strip()
results.append(
f"**Guest Article:** `{article_path}`\n"
f" Title: {article_title}\n"
f" Words: {wc}"
)
log.info("Guest article saved: %s (%d words)", article_path, wc)
except Exception as e:
warnings.append(f"Guest article generation failed: {e}")
log.error("Guest article failed: %s", e)
article_title = keyword # fallback for social post
# =====================================================================
# Step 2: Directory / Citation Entry (execution brain)
# =====================================================================
_set_status(ctx, f"Link building: Writing directory entry — {keyword}")
log.info("Step 2: Generating directory entry for '%s'", keyword)
directory_prompt = _build_directory_prompt(
keyword, company_name, target_url, branded_url, company_info,
)
try:
directory_raw = agent.execute_task(directory_prompt)
directory_text = _clean_content(directory_raw)
wc = _word_count(directory_text)
if wc < 30:
warnings.append(f"Directory entry too short ({wc} words)")
else:
dir_path = output_dir / "directory-listing.md"
dir_path.write_text(directory_text, encoding="utf-8")
deliverable_paths.append(str(dir_path))
results.append(
f"**Directory Listing:** `{dir_path}`\n"
f" Words: {wc}"
)
log.info("Directory listing saved: %s (%d words)", dir_path, wc)
except Exception as e:
warnings.append(f"Directory entry generation failed: {e}")
log.error("Directory entry failed: %s", e)
# =====================================================================
# Step 3: Social Media Post (chat brain — fast)
# =====================================================================
_set_status(ctx, f"Link building: Writing social post — {keyword}")
log.info("Step 3: Generating social post for '%s'", keyword)
social_prompt = _build_social_post_prompt(
keyword, company_name, target_url,
article_title if "article_title" in dir() else keyword,
)
try:
social_text = _chat_call(agent, [{"role": "user", "content": social_prompt}])
social_text = social_text.strip()
wc = _word_count(social_text)
if wc < 20:
warnings.append(f"Social post too short ({wc} words)")
else:
social_path = output_dir / "social-post.md"
social_path.write_text(social_text, encoding="utf-8")
deliverable_paths.append(str(social_path))
results.append(
f"**Social Post:** `{social_path}`\n"
f" Words: {wc}"
)
log.info("Social post saved: %s (%d words)", social_path, wc)
except Exception as e:
warnings.append(f"Social post generation failed: {e}")
log.error("Social post failed: %s", e)
# =====================================================================
# Summary
# =====================================================================
elapsed = time.time() - t0
_set_status(ctx, "")
summary_lines = [
f"# Link Building Complete: {company_name}{keyword}\n",
f"**Keyword:** {keyword}",
f"**Company:** {company_info.get('name', company_name)}",
f"**Target URL:** {target_url or '(none)'}",
f"**Output Dir:** `{output_dir}`",
f"**Time:** {elapsed:.1f}s",
f"**Deliverables:** {len(deliverable_paths)}",
"",
]
if results:
summary_lines.append("## Generated Content")
summary_lines.extend(results)
if warnings:
summary_lines.append("\n## Warnings")
for w in warnings:
summary_lines.append(f"- ⚠️ {w}")
summary = "\n".join(summary_lines)
# --- ClickUp sync ---
if task_id:
sync_report = _sync_clickup(ctx, task_id, deliverable_paths, summary)
summary += sync_report
return summary
def _clean_content(raw: str) -> str:
"""Clean execution brain output to just the content text.
Strips common prefixes/suffixes the LLM might add.
"""
text = raw.strip()
# Remove common LLM wrapper text
for prefix in [
"Here is the",
"Here's the",
"Below is the",
"I've written",
"Sure, here",
"Certainly!",
]:
if text.lower().startswith(prefix.lower()):
# Skip to the first blank line after the prefix
idx = text.find("\n\n")
if idx != -1 and idx < 200:
text = text[idx:].strip()
break
# Remove trailing "---" or "Let me know" type endings
text = re.sub(r"\n---\s*$", "", text).strip()
text = re.sub(r"\n(Let me know|I hope|Feel free|Would you).*$", "", text, flags=re.DOTALL).strip()
return text