CheddahBot/cheddahbot/tools/linkbuilding.py

"""Link-building content pipeline tool.

Autonomous workflow:
  1. Look up company info from companies.md
  2. Generate a guest article (500-700 words) via execution brain
  3. Generate a resource/directory blurb via execution brain
  4. Generate a social media post via chat brain
  5. Save all content to files, return cost summary
"""

from __future__ import annotations

import json
import logging
import re
import time
from datetime import UTC, datetime
from pathlib import Path

from . import tool

log = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------

_ROOT_DIR = Path(__file__).resolve().parent.parent.parent
_SKILLS_DIR = _ROOT_DIR / "skills"
_DATA_DIR = _ROOT_DIR / "data"
_OUTPUT_DIR = _DATA_DIR / "generated" / "link_building"
_COMPANIES_FILE = _SKILLS_DIR / "companies.md"

SONNET_CLI_MODEL = "sonnet"


# ---------------------------------------------------------------------------
# Status / helpers
# ---------------------------------------------------------------------------


def _set_status(ctx: dict | None, message: str) -> None:
    """Write pipeline progress to the DB so the UI can poll it."""
    if ctx and "db" in ctx:
        ctx["db"].kv_set("pipeline:status", message)


def _slugify(text: str) -> str:
    """Turn a phrase into a filesystem-safe slug."""
    text = text.lower().strip()
    text = re.sub(r"[^\w\s-]", "", text)
    text = re.sub(r"[\s_]+", "-", text)
    return text[:60].strip("-")


def _word_count(text: str) -> int:
    return len(text.split())


def _fuzzy_company_match(name: str, candidate: str) -> bool:
    """Check if name fuzzy-matches a candidate string."""
    if not name or not candidate:
        return False
    a, b = name.lower().strip(), candidate.lower().strip()
    return a == b or a in b or b in a


def _extract_keyword_from_task_name(task_name: str) -> str:
    """Extract keyword from ClickUp task name like 'LINKS - precision cnc turning'."""
    if " - " in task_name:
        return task_name.split(" - ", 1)[1].strip()
    return task_name.strip()


def _load_skill(filename: str) -> str:
    """Read a markdown skill file from the skills/ directory, stripping frontmatter."""
    path = _SKILLS_DIR / filename
    if not path.exists():
        raise FileNotFoundError(f"Skill file not found: {path}")
    text = path.read_text(encoding="utf-8")

    # Strip YAML frontmatter (--- ... ---) if present
    if text.startswith("---"):
        end = text.find("---", 3)
        if end != -1:
            text = text[end + 3:].strip()

    return text


def _lookup_company(company_name: str) -> dict:
    """Look up company info from companies.md.

    Returns a dict with keys: name, executive, pa_org_id, website, gbp.
    """
    if not _COMPANIES_FILE.exists():
        return {"name": company_name}

    text = _COMPANIES_FILE.read_text(encoding="utf-8")
    result = {"name": company_name}

    # Parse companies.md format: ## Company Name followed by bullet fields
    current_company = ""
    for line in text.splitlines():
        if line.startswith("## "):
            current_company = line[3:].strip()
        elif current_company and _fuzzy_company_match(company_name, current_company):
            result["name"] = current_company
            if line.startswith("- **Executive:**"):
                result["executive"] = line.split(":**", 1)[1].strip()
            elif line.startswith("- **PA Org ID:**"):
                result["pa_org_id"] = line.split(":**", 1)[1].strip()
            elif line.startswith("- **Website:**"):
                result["website"] = line.split(":**", 1)[1].strip()
            elif line.startswith("- **GBP:**"):
                result["gbp"] = line.split(":**", 1)[1].strip()

    return result


def _chat_call(agent, messages: list[dict]) -> str:
    """Make a non-streaming chat-brain call and return the full text."""
    parts: list[str] = []
    for chunk in agent.llm.chat(messages, tools=None, stream=False):
        if chunk["type"] == "text":
            parts.append(chunk["content"])
    return "".join(parts)


def _get_clickup_client(ctx: dict | None):
    """Create a ClickUpClient from tool context, or None if unavailable."""
    if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
        return None
    try:
        from ..clickup import ClickUpClient

        config = ctx["config"]
        return ClickUpClient(
            api_token=config.clickup.api_token,
            workspace_id=config.clickup.workspace_id,
            task_type_field_name=config.clickup.task_type_field_name,
        )
    except Exception as e:
        log.warning("Could not create ClickUp client: %s", e)
        return None


def _sync_clickup(ctx: dict | None, task_id: str, deliverable_paths: list[str],
                  summary: str) -> str:
    """Upload deliverables and update ClickUp task status. Returns sync report."""
    if not task_id or not ctx:
        return ""

    client = _get_clickup_client(ctx)
    if not client:
        return ""

    config = ctx["config"]
    db = ctx.get("db")
    lines = ["\n## ClickUp Sync"]

    try:
        # Upload attachments
        uploaded = 0
        for path in deliverable_paths:
            if client.upload_attachment(task_id, path):
                uploaded += 1
        if uploaded:
            lines.append(f"- Uploaded {uploaded} file(s)")

        # Update status to review
        client.update_task_status(task_id, config.clickup.review_status)
        lines.append(f"- Status → '{config.clickup.review_status}'")

        # Add comment
        comment = (
            f"✅ CheddahBot completed link building.\n\n"
            f"{summary}\n\n"
            f"📎 {uploaded} file(s) attached."
        )
        client.add_comment(task_id, comment)
        lines.append("- Comment added")

        # Update kv_store state
        if db:
            kv_key = f"clickup:task:{task_id}:state"
            raw = db.kv_get(kv_key)
            if raw:
                try:
                    state = json.loads(raw)
                    state["state"] = "completed"
                    state["completed_at"] = datetime.now(UTC).isoformat()
                    state["deliverable_paths"] = [str(p) for p in deliverable_paths]
                    db.kv_set(kv_key, json.dumps(state))
                except json.JSONDecodeError:
                    pass

    except Exception as e:
        lines.append(f"- Sync error: {e}")
        log.error("ClickUp sync failed for task %s: %s", task_id, e)
    finally:
        client.close()

    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Prompt builders
# ---------------------------------------------------------------------------


def _build_guest_article_prompt(
    keyword: str, company_name: str, target_url: str, company_info: dict,
    skill_prompt: str,
) -> str:
    """Build the prompt for the execution brain to write a guest article."""
    executive = company_info.get("executive", "")

    prompt = skill_prompt + "\n\n"
    prompt += "## Assignment: Guest Article\n\n"
    prompt += f"**Target Keyword:** {keyword}\n"
    prompt += f"**Company:** {company_name}\n"
    if executive:
        prompt += f"**Executive/Contact:** {executive}\n"
    if target_url:
        prompt += f"**Target URL (for backlink):** {target_url}\n"
    prompt += (
        "\n**Instructions:**\n"
        "Write a 500-700 word guest article suitable for industry blogs and "
        "trade publications. The article should:\n"
        "- Be informative and educational, NOT promotional\n"
        "- Naturally incorporate the target keyword 2-3 times\n"
        "- Include ONE natural backlink to the target URL using the keyword "
        "or a close variation as anchor text\n"
        "- Include a second branded mention of the company name (no link needed)\n"
        "- Read like expert industry commentary, not an advertisement\n"
        "- Have a compelling title (under 70 characters)\n"
        "- Use subheadings to break up the content\n"
        "- End with a brief author bio mentioning the company\n\n"
        "Return ONLY the article text. No meta-commentary."
    )
    return prompt


def _build_directory_prompt(
    keyword: str, company_name: str, target_url: str, branded_url: str,
    company_info: dict,
) -> str:
    """Build the prompt for the execution brain to write a directory/citation entry."""
    executive = company_info.get("executive", "")
    website = company_info.get("website", "") or target_url

    prompt = (
        "## Assignment: Business Directory / Citation Entry\n\n"
        f"**Company:** {company_name}\n"
        f"**Target Keyword:** {keyword}\n"
    )
    if executive:
        prompt += f"**Executive:** {executive}\n"
    if website:
        prompt += f"**Website:** {website}\n"
    if branded_url:
        prompt += f"**Social/GBP URL:** {branded_url}\n"

    prompt += (
        "\n**Instructions:**\n"
        "Write a business directory entry / citation profile. Include:\n"
        "1. **Company Description** (150-200 words) — Describe what the company "
        "does, naturally incorporating the target keyword. Professional tone.\n"
        "2. **Services List** (5-8 bullet points) — Key services/capabilities, "
        "with the target keyword appearing in at least one bullet.\n"
        "3. **About Section** (2-3 sentences) — Brief company background.\n\n"
        "This will be used for industry directories, Google Business Profile, "
        "and business listing sites. Keep it factual and professional.\n\n"
        "Return ONLY the directory entry text. No meta-commentary."
    )
    return prompt


def _build_social_post_prompt(
    keyword: str, company_name: str, target_url: str, article_title: str,
) -> str:
    """Build the prompt for the chat brain to write a social media post."""
    prompt = (
        f"Write a professional LinkedIn post for {company_name} about "
        f"'{keyword}'. The post should:\n"
        f"- Be 100-150 words\n"
        f"- Reference the article: \"{article_title}\"\n"
        f"- Include the link: {target_url}\n" if target_url else ""
        f"- Use 2-3 relevant hashtags\n"
        f"- Professional, not salesy\n"
        f"- Encourage engagement (comment/share)\n\n"
        "Return ONLY the post text."
    )
    return prompt


# ---------------------------------------------------------------------------
# Main tool
# ---------------------------------------------------------------------------


@tool(
    "build_links",
    "Generate SEO link building content for a target keyword and company. "
    "Produces a guest article, directory listing, and social post, each with "
    "proper anchor text and backlinks. Files saved to data/generated/link_building/.",
    category="linkbuilding",
)
def build_links(
    keyword: str,
    company_name: str,
    target_url: str = "",
    branded_url: str = "",
    ctx: dict | None = None,
) -> str:
    """Main link-building content pipeline.

    Args:
        keyword: Target SEO keyword (e.g., "precision cnc turning").
        company_name: Client company name (e.g., "Chapter2").
        target_url: Primary URL to build backlinks to (from IMSURL field).
        branded_url: Secondary branded URL (from SocialURL field).
        ctx: Injected tool context with config, db, agent.

    Returns:
        Summary of generated content with file paths.
    """
    t0 = time.time()
    agent = ctx.get("agent") if ctx else None
    task_id = ctx.get("clickup_task_id", "") if ctx else ""

    if not agent:
        return "Error: link building tool requires agent context."

    # Derive keyword from task name if it looks like "LINKS - keyword"
    keyword = _extract_keyword_from_task_name(keyword) if keyword.startswith("LINKS") else keyword

    log.info("Link building pipeline: keyword='%s', company='%s'", keyword, company_name)
    _set_status(ctx, f"Link building: {company_name} — {keyword}")

    # --- Company lookup ---
    company_info = _lookup_company(company_name)
    log.info("Company info: %s", company_info)

    # --- Load skill prompt ---
    try:
        skill_prompt = _load_skill("linkbuilding.md")
    except FileNotFoundError:
        skill_prompt = ""
        log.warning("linkbuilding.md skill not found, using inline prompts only")

    # --- Create output directory ---
    company_slug = _slugify(company_name)
    keyword_slug = _slugify(keyword)
    output_dir = _OUTPUT_DIR / company_slug / keyword_slug
    output_dir.mkdir(parents=True, exist_ok=True)

    results = []
    deliverable_paths: list[str] = []
    warnings: list[str] = []

    # =====================================================================
    # Step 1: Guest Article (execution brain)
    # =====================================================================
    _set_status(ctx, f"Link building: Writing guest article — {keyword}")
    log.info("Step 1: Generating guest article for '%s'", keyword)

    article_prompt = _build_guest_article_prompt(
        keyword, company_name, target_url, company_info, skill_prompt,
    )
    try:
        article_raw = agent.execute_task(article_prompt)
        article_text = _clean_content(article_raw)
        wc = _word_count(article_text)

        if wc < 100:
            warnings.append(f"Guest article too short ({wc} words)")
            log.warning("Guest article too short: %d words", wc)
        else:
            article_path = output_dir / "guest-article.md"
            article_path.write_text(article_text, encoding="utf-8")
            deliverable_paths.append(str(article_path))

            # Extract title from first line
            article_title = article_text.splitlines()[0].strip("# ").strip()
            results.append(
                f"**Guest Article:** `{article_path}`\n"
                f"  Title: {article_title}\n"
                f"  Words: {wc}"
            )
            log.info("Guest article saved: %s (%d words)", article_path, wc)
    except Exception as e:
        warnings.append(f"Guest article generation failed: {e}")
        log.error("Guest article failed: %s", e)
        article_title = keyword  # fallback for social post

    # =====================================================================
    # Step 2: Directory / Citation Entry (execution brain)
    # =====================================================================
    _set_status(ctx, f"Link building: Writing directory entry — {keyword}")
    log.info("Step 2: Generating directory entry for '%s'", keyword)

    directory_prompt = _build_directory_prompt(
        keyword, company_name, target_url, branded_url, company_info,
    )
    try:
        directory_raw = agent.execute_task(directory_prompt)
        directory_text = _clean_content(directory_raw)
        wc = _word_count(directory_text)

        if wc < 30:
            warnings.append(f"Directory entry too short ({wc} words)")
        else:
            dir_path = output_dir / "directory-listing.md"
            dir_path.write_text(directory_text, encoding="utf-8")
            deliverable_paths.append(str(dir_path))
            results.append(
                f"**Directory Listing:** `{dir_path}`\n"
                f"  Words: {wc}"
            )
            log.info("Directory listing saved: %s (%d words)", dir_path, wc)
    except Exception as e:
        warnings.append(f"Directory entry generation failed: {e}")
        log.error("Directory entry failed: %s", e)

    # =====================================================================
    # Step 3: Social Media Post (chat brain — fast)
    # =====================================================================
    _set_status(ctx, f"Link building: Writing social post — {keyword}")
    log.info("Step 3: Generating social post for '%s'", keyword)

    social_prompt = _build_social_post_prompt(
        keyword, company_name, target_url,
        article_title if "article_title" in dir() else keyword,
    )
    try:
        social_text = _chat_call(agent, [{"role": "user", "content": social_prompt}])
        social_text = social_text.strip()
        wc = _word_count(social_text)

        if wc < 20:
            warnings.append(f"Social post too short ({wc} words)")
        else:
            social_path = output_dir / "social-post.md"
            social_path.write_text(social_text, encoding="utf-8")
            deliverable_paths.append(str(social_path))
            results.append(
                f"**Social Post:** `{social_path}`\n"
                f"  Words: {wc}"
            )
            log.info("Social post saved: %s (%d words)", social_path, wc)
    except Exception as e:
        warnings.append(f"Social post generation failed: {e}")
        log.error("Social post failed: %s", e)

    # =====================================================================
    # Summary
    # =====================================================================
    elapsed = time.time() - t0
    _set_status(ctx, "")

    summary_lines = [
        f"# Link Building Complete: {company_name} — {keyword}\n",
        f"**Keyword:** {keyword}",
        f"**Company:** {company_info.get('name', company_name)}",
        f"**Target URL:** {target_url or '(none)'}",
        f"**Output Dir:** `{output_dir}`",
        f"**Time:** {elapsed:.1f}s",
        f"**Deliverables:** {len(deliverable_paths)}",
        "",
    ]

    if results:
        summary_lines.append("## Generated Content")
        summary_lines.extend(results)

    if warnings:
        summary_lines.append("\n## Warnings")
        for w in warnings:
            summary_lines.append(f"- ⚠️ {w}")

    summary = "\n".join(summary_lines)

    # --- ClickUp sync ---
    if task_id:
        sync_report = _sync_clickup(ctx, task_id, deliverable_paths, summary)
        summary += sync_report

    return summary


def _clean_content(raw: str) -> str:
    """Clean execution brain output to just the content text.

    Strips common prefixes/suffixes the LLM might add.
    """
    text = raw.strip()

    # Remove common LLM wrapper text
    for prefix in [
        "Here is the",
        "Here's the",
        "Below is the",
        "I've written",
        "Sure, here",
        "Certainly!",
    ]:
        if text.lower().startswith(prefix.lower()):
            # Skip to the first blank line after the prefix
            idx = text.find("\n\n")
            if idx != -1 and idx < 200:
                text = text[idx:].strip()
            break

    # Remove trailing "---" or "Let me know" type endings
    text = re.sub(r"\n---\s*$", "", text).strip()
    text = re.sub(r"\n(Let me know|I hope|Feel free|Would you).*$", "", text, flags=re.DOTALL).strip()

    return text