"""Link-building content pipeline tool. Autonomous workflow: 1. Look up company info from companies.md 2. Generate a guest article (500-700 words) via execution brain 3. Generate a resource/directory blurb via execution brain 4. Generate a social media post via chat brain 5. Save all content to files, return cost summary """ from __future__ import annotations import json import logging import re import time from datetime import UTC, datetime from pathlib import Path from . import tool log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- _ROOT_DIR = Path(__file__).resolve().parent.parent.parent _SKILLS_DIR = _ROOT_DIR / "skills" _DATA_DIR = _ROOT_DIR / "data" _OUTPUT_DIR = _DATA_DIR / "generated" / "link_building" _COMPANIES_FILE = _SKILLS_DIR / "companies.md" SONNET_CLI_MODEL = "sonnet" # --------------------------------------------------------------------------- # Status / helpers # --------------------------------------------------------------------------- def _set_status(ctx: dict | None, message: str) -> None: """Write pipeline progress to the DB so the UI can poll it.""" if ctx and "db" in ctx: ctx["db"].kv_set("pipeline:status", message) def _slugify(text: str) -> str: """Turn a phrase into a filesystem-safe slug.""" text = text.lower().strip() text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[\s_]+", "-", text) return text[:60].strip("-") def _word_count(text: str) -> int: return len(text.split()) def _fuzzy_company_match(name: str, candidate: str) -> bool: """Check if name fuzzy-matches a candidate string.""" if not name or not candidate: return False a, b = name.lower().strip(), candidate.lower().strip() return a == b or a in b or b in a def _extract_keyword_from_task_name(task_name: str) -> str: """Extract keyword from ClickUp task name like 'LINKS - precision cnc turning'.""" if " - " in task_name: return task_name.split(" - ", 1)[1].strip() return task_name.strip() def _load_skill(filename: str) -> str: """Read a markdown skill file from the skills/ directory, stripping frontmatter.""" path = _SKILLS_DIR / filename if not path.exists(): raise FileNotFoundError(f"Skill file not found: {path}") text = path.read_text(encoding="utf-8") # Strip YAML frontmatter (--- ... ---) if present if text.startswith("---"): end = text.find("---", 3) if end != -1: text = text[end + 3:].strip() return text def _lookup_company(company_name: str) -> dict: """Look up company info from companies.md. Returns a dict with keys: name, executive, pa_org_id, website, gbp. """ if not _COMPANIES_FILE.exists(): return {"name": company_name} text = _COMPANIES_FILE.read_text(encoding="utf-8") result = {"name": company_name} # Parse companies.md format: ## Company Name followed by bullet fields current_company = "" for line in text.splitlines(): if line.startswith("## "): current_company = line[3:].strip() elif current_company and _fuzzy_company_match(company_name, current_company): result["name"] = current_company if line.startswith("- **Executive:**"): result["executive"] = line.split(":**", 1)[1].strip() elif line.startswith("- **PA Org ID:**"): result["pa_org_id"] = line.split(":**", 1)[1].strip() elif line.startswith("- **Website:**"): result["website"] = line.split(":**", 1)[1].strip() elif line.startswith("- **GBP:**"): result["gbp"] = line.split(":**", 1)[1].strip() return result def _chat_call(agent, messages: list[dict]) -> str: """Make a non-streaming chat-brain call and return the full text.""" parts: list[str] = [] for chunk in agent.llm.chat(messages, tools=None, stream=False): if chunk["type"] == "text": parts.append(chunk["content"]) return "".join(parts) def _get_clickup_client(ctx: dict | None): """Create a ClickUpClient from tool context, or None if unavailable.""" if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled: return None try: from ..clickup import ClickUpClient config = ctx["config"] return ClickUpClient( api_token=config.clickup.api_token, workspace_id=config.clickup.workspace_id, task_type_field_name=config.clickup.task_type_field_name, ) except Exception as e: log.warning("Could not create ClickUp client: %s", e) return None def _sync_clickup(ctx: dict | None, task_id: str, deliverable_paths: list[str], summary: str) -> str: """Upload deliverables and update ClickUp task status. Returns sync report.""" if not task_id or not ctx: return "" client = _get_clickup_client(ctx) if not client: return "" config = ctx["config"] db = ctx.get("db") lines = ["\n## ClickUp Sync"] try: # Upload attachments uploaded = 0 for path in deliverable_paths: if client.upload_attachment(task_id, path): uploaded += 1 if uploaded: lines.append(f"- Uploaded {uploaded} file(s)") # Update status to review client.update_task_status(task_id, config.clickup.review_status) lines.append(f"- Status → '{config.clickup.review_status}'") # Add comment comment = ( f"✅ CheddahBot completed link building.\n\n" f"{summary}\n\n" f"📎 {uploaded} file(s) attached." ) client.add_comment(task_id, comment) lines.append("- Comment added") # Update kv_store state if db: kv_key = f"clickup:task:{task_id}:state" raw = db.kv_get(kv_key) if raw: try: state = json.loads(raw) state["state"] = "completed" state["completed_at"] = datetime.now(UTC).isoformat() state["deliverable_paths"] = [str(p) for p in deliverable_paths] db.kv_set(kv_key, json.dumps(state)) except json.JSONDecodeError: pass except Exception as e: lines.append(f"- Sync error: {e}") log.error("ClickUp sync failed for task %s: %s", task_id, e) finally: client.close() return "\n".join(lines) # --------------------------------------------------------------------------- # Prompt builders # --------------------------------------------------------------------------- def _build_guest_article_prompt( keyword: str, company_name: str, target_url: str, company_info: dict, skill_prompt: str, ) -> str: """Build the prompt for the execution brain to write a guest article.""" executive = company_info.get("executive", "") prompt = skill_prompt + "\n\n" prompt += "## Assignment: Guest Article\n\n" prompt += f"**Target Keyword:** {keyword}\n" prompt += f"**Company:** {company_name}\n" if executive: prompt += f"**Executive/Contact:** {executive}\n" if target_url: prompt += f"**Target URL (for backlink):** {target_url}\n" prompt += ( "\n**Instructions:**\n" "Write a 500-700 word guest article suitable for industry blogs and " "trade publications. The article should:\n" "- Be informative and educational, NOT promotional\n" "- Naturally incorporate the target keyword 2-3 times\n" "- Include ONE natural backlink to the target URL using the keyword " "or a close variation as anchor text\n" "- Include a second branded mention of the company name (no link needed)\n" "- Read like expert industry commentary, not an advertisement\n" "- Have a compelling title (under 70 characters)\n" "- Use subheadings to break up the content\n" "- End with a brief author bio mentioning the company\n\n" "Return ONLY the article text. No meta-commentary." ) return prompt def _build_directory_prompt( keyword: str, company_name: str, target_url: str, branded_url: str, company_info: dict, ) -> str: """Build the prompt for the execution brain to write a directory/citation entry.""" executive = company_info.get("executive", "") website = company_info.get("website", "") or target_url prompt = ( "## Assignment: Business Directory / Citation Entry\n\n" f"**Company:** {company_name}\n" f"**Target Keyword:** {keyword}\n" ) if executive: prompt += f"**Executive:** {executive}\n" if website: prompt += f"**Website:** {website}\n" if branded_url: prompt += f"**Social/GBP URL:** {branded_url}\n" prompt += ( "\n**Instructions:**\n" "Write a business directory entry / citation profile. Include:\n" "1. **Company Description** (150-200 words) — Describe what the company " "does, naturally incorporating the target keyword. Professional tone.\n" "2. **Services List** (5-8 bullet points) — Key services/capabilities, " "with the target keyword appearing in at least one bullet.\n" "3. **About Section** (2-3 sentences) — Brief company background.\n\n" "This will be used for industry directories, Google Business Profile, " "and business listing sites. Keep it factual and professional.\n\n" "Return ONLY the directory entry text. No meta-commentary." ) return prompt def _build_social_post_prompt( keyword: str, company_name: str, target_url: str, article_title: str, ) -> str: """Build the prompt for the chat brain to write a social media post.""" prompt = ( f"Write a professional LinkedIn post for {company_name} about " f"'{keyword}'. The post should:\n" f"- Be 100-150 words\n" f"- Reference the article: \"{article_title}\"\n" f"- Include the link: {target_url}\n" if target_url else "" f"- Use 2-3 relevant hashtags\n" f"- Professional, not salesy\n" f"- Encourage engagement (comment/share)\n\n" "Return ONLY the post text." ) return prompt # --------------------------------------------------------------------------- # Main tool # --------------------------------------------------------------------------- @tool( "build_links", "Generate SEO link building content for a target keyword and company. " "Produces a guest article, directory listing, and social post, each with " "proper anchor text and backlinks. Files saved to data/generated/link_building/.", category="linkbuilding", ) def build_links( keyword: str, company_name: str, target_url: str = "", branded_url: str = "", ctx: dict | None = None, ) -> str: """Main link-building content pipeline. Args: keyword: Target SEO keyword (e.g., "precision cnc turning"). company_name: Client company name (e.g., "Chapter2"). target_url: Primary URL to build backlinks to (from IMSURL field). branded_url: Secondary branded URL (from SocialURL field). ctx: Injected tool context with config, db, agent. Returns: Summary of generated content with file paths. """ t0 = time.time() agent = ctx.get("agent") if ctx else None task_id = ctx.get("clickup_task_id", "") if ctx else "" if not agent: return "Error: link building tool requires agent context." # Derive keyword from task name if it looks like "LINKS - keyword" keyword = _extract_keyword_from_task_name(keyword) if keyword.startswith("LINKS") else keyword log.info("Link building pipeline: keyword='%s', company='%s'", keyword, company_name) _set_status(ctx, f"Link building: {company_name} — {keyword}") # --- Company lookup --- company_info = _lookup_company(company_name) log.info("Company info: %s", company_info) # --- Load skill prompt --- try: skill_prompt = _load_skill("linkbuilding.md") except FileNotFoundError: skill_prompt = "" log.warning("linkbuilding.md skill not found, using inline prompts only") # --- Create output directory --- company_slug = _slugify(company_name) keyword_slug = _slugify(keyword) output_dir = _OUTPUT_DIR / company_slug / keyword_slug output_dir.mkdir(parents=True, exist_ok=True) results = [] deliverable_paths: list[str] = [] warnings: list[str] = [] # ===================================================================== # Step 1: Guest Article (execution brain) # ===================================================================== _set_status(ctx, f"Link building: Writing guest article — {keyword}") log.info("Step 1: Generating guest article for '%s'", keyword) article_prompt = _build_guest_article_prompt( keyword, company_name, target_url, company_info, skill_prompt, ) try: article_raw = agent.execute_task(article_prompt) article_text = _clean_content(article_raw) wc = _word_count(article_text) if wc < 100: warnings.append(f"Guest article too short ({wc} words)") log.warning("Guest article too short: %d words", wc) else: article_path = output_dir / "guest-article.md" article_path.write_text(article_text, encoding="utf-8") deliverable_paths.append(str(article_path)) # Extract title from first line article_title = article_text.splitlines()[0].strip("# ").strip() results.append( f"**Guest Article:** `{article_path}`\n" f" Title: {article_title}\n" f" Words: {wc}" ) log.info("Guest article saved: %s (%d words)", article_path, wc) except Exception as e: warnings.append(f"Guest article generation failed: {e}") log.error("Guest article failed: %s", e) article_title = keyword # fallback for social post # ===================================================================== # Step 2: Directory / Citation Entry (execution brain) # ===================================================================== _set_status(ctx, f"Link building: Writing directory entry — {keyword}") log.info("Step 2: Generating directory entry for '%s'", keyword) directory_prompt = _build_directory_prompt( keyword, company_name, target_url, branded_url, company_info, ) try: directory_raw = agent.execute_task(directory_prompt) directory_text = _clean_content(directory_raw) wc = _word_count(directory_text) if wc < 30: warnings.append(f"Directory entry too short ({wc} words)") else: dir_path = output_dir / "directory-listing.md" dir_path.write_text(directory_text, encoding="utf-8") deliverable_paths.append(str(dir_path)) results.append( f"**Directory Listing:** `{dir_path}`\n" f" Words: {wc}" ) log.info("Directory listing saved: %s (%d words)", dir_path, wc) except Exception as e: warnings.append(f"Directory entry generation failed: {e}") log.error("Directory entry failed: %s", e) # ===================================================================== # Step 3: Social Media Post (chat brain — fast) # ===================================================================== _set_status(ctx, f"Link building: Writing social post — {keyword}") log.info("Step 3: Generating social post for '%s'", keyword) social_prompt = _build_social_post_prompt( keyword, company_name, target_url, article_title if "article_title" in dir() else keyword, ) try: social_text = _chat_call(agent, [{"role": "user", "content": social_prompt}]) social_text = social_text.strip() wc = _word_count(social_text) if wc < 20: warnings.append(f"Social post too short ({wc} words)") else: social_path = output_dir / "social-post.md" social_path.write_text(social_text, encoding="utf-8") deliverable_paths.append(str(social_path)) results.append( f"**Social Post:** `{social_path}`\n" f" Words: {wc}" ) log.info("Social post saved: %s (%d words)", social_path, wc) except Exception as e: warnings.append(f"Social post generation failed: {e}") log.error("Social post failed: %s", e) # ===================================================================== # Summary # ===================================================================== elapsed = time.time() - t0 _set_status(ctx, "") summary_lines = [ f"# Link Building Complete: {company_name} — {keyword}\n", f"**Keyword:** {keyword}", f"**Company:** {company_info.get('name', company_name)}", f"**Target URL:** {target_url or '(none)'}", f"**Output Dir:** `{output_dir}`", f"**Time:** {elapsed:.1f}s", f"**Deliverables:** {len(deliverable_paths)}", "", ] if results: summary_lines.append("## Generated Content") summary_lines.extend(results) if warnings: summary_lines.append("\n## Warnings") for w in warnings: summary_lines.append(f"- ⚠️ {w}") summary = "\n".join(summary_lines) # --- ClickUp sync --- if task_id: sync_report = _sync_clickup(ctx, task_id, deliverable_paths, summary) summary += sync_report return summary def _clean_content(raw: str) -> str: """Clean execution brain output to just the content text. Strips common prefixes/suffixes the LLM might add. """ text = raw.strip() # Remove common LLM wrapper text for prefix in [ "Here is the", "Here's the", "Below is the", "I've written", "Sure, here", "Certainly!", ]: if text.lower().startswith(prefix.lower()): # Skip to the first blank line after the prefix idx = text.find("\n\n") if idx != -1 and idx < 200: text = text[idx:].strip() break # Remove trailing "---" or "Let me know" type endings text = re.sub(r"\n---\s*$", "", text).strip() text = re.sub(r"\n(Let me know|I hope|Feel free|Would you).*$", "", text, flags=re.DOTALL).strip() return text