CheddahBot/clickup_runner/fact_check.py

"""Adversarial fact-checker for press release outputs.

Runs a second Claude Code pass on generated PR text files to catch
factual errors. Treats all client-provided data (company name, titles,
URLs, topic) as ground truth and only corrects claims the PR inferred
or fabricated beyond what was given.

Graceful failure: any error returns the original text untouched.
"""

from __future__ import annotations

import logging
import shutil
import subprocess
from pathlib import Path

log = logging.getLogger(__name__)

FACT_CHECK_MODEL = "sonnet"
FACT_CHECK_TIMEOUT = 300  # 5 minutes per PR


def build_fact_check_prompt(
    pr_text: str,
    company_name: str,
    url: str,
    topic: str,
    keyword: str,
) -> str:
    """Build the prompt for the adversarial fact-checker."""
    return (
        "You are a factual accuracy reviewer for press releases. Your ONLY job is to "
        "find and correct statements that are factually wrong. You are NOT an editor.\n\n"
        "GROUND TRUTH -- the following data was provided by the client and is correct "
        "by definition. Do NOT change, question, or 'correct' any of it, even if your "
        "web search suggests something different:\n"
        "  - Company name: %s\n"
        "  - Target URL: %s\n"
        "  - Topic: %s\n"
        "  - Keyword: %s\n"
        "  - Any person names, titles, quotes, or contact details in the PR\n"
        "  - Any product names, service names, or brand names\n"
        "  - The overall framing, angle, and tone of the PR\n\n"
        "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
        "  - Industry statistics or market size claims\n"
        "  - Historical dates or facts\n"
        "  - Technical specifications not sourced from the client data\n"
        "  - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
        "  - Geographic or regulatory facts\n\n"
        "RULES:\n"
        "  - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
        "  - Do NOT add content, remove content, restructure, or 'improve' anything\n"
        "  - Do NOT change tone, style, word choice, or sentence structure\n"
        "  - Do NOT suggest additions or enhancements\n"
        "  - Make the MINIMUM change needed to fix each error\n"
        "  - Preserve the exact formatting, paragraph breaks, and headline\n\n"
        "OUTPUT FORMAT:\n"
        "  - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
        "  - If you find errors: output [CORRECTED] on the first line, then the full "
        "corrected PR text (preserving all formatting), then a blank line, then "
        "CHANGES: followed by a numbered list of what you changed and why\n\n"
        "Press release to review:\n"
        "---\n"
        "%s\n"
        "---"
    ) % (company_name, url, topic, keyword, pr_text)


def apply_fact_check(raw_output: str, original_text: str) -> tuple[str, str, str]:
    """Parse fact-checker output. Returns (text, status, changes).

    status is one of: "clean", "corrected", "skipped"
    On any parse failure or suspect rewrite, returns original text unchanged.
    """
    if not raw_output or not raw_output.strip():
        return original_text, "skipped", ""

    stripped = raw_output.strip()

    # No errors found
    if stripped.startswith("[NO_ERRORS]"):
        return original_text, "clean", ""

    # Corrections found
    if stripped.startswith("[CORRECTED]"):
        body = stripped[len("[CORRECTED]"):].strip()

        # Split into corrected text and change log
        changes = ""
        if "\nCHANGES:" in body:
            text_part, changes = body.split("\nCHANGES:", 1)
            corrected = text_part.strip()
            changes = changes.strip()
        else:
            corrected = body

        if not corrected:
            return original_text, "skipped", ""

        # Safety: reject if word count differs by more than 15%
        orig_wc = len(original_text.split())
        new_wc = len(corrected.split())
        if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
            log.warning(
                "Fact-check rejected: word count changed too much "
                "(%d -> %d, %.0f%% delta)",
                orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
            )
            return original_text, "skipped", "rejected -- word count delta too large"

        return corrected, "corrected", changes

    # Unparseable output
    return original_text, "skipped", ""


def fact_check_pr_files(
    output_files: list[Path],
    company_name: str,
    url: str,
    topic: str,
    keyword: str,
    timeout: int = FACT_CHECK_TIMEOUT,
) -> tuple[list[str], bool]:
    """Run fact-check on .txt PR files in the output list.

    Returns:
        (status_lines, any_failed) where status_lines is a list of
        human-readable results per PR, and any_failed is True if the
        fact-checker could not run on at least one PR.
    """
    claude_bin = shutil.which("claude")
    if not claude_bin:
        log.warning("Fact-check: claude CLI not found, skipping")
        return ["Fact-check: claude CLI not found, skipped"], True

    txt_files = [f for f in output_files if f.suffix == ".txt"]
    # Skip non-PR files like "Headlines Evaluation.md"
    # PR files are the .txt files (the actual press releases)
    if not txt_files:
        return [], False

    status_lines: list[str] = []
    any_failed = False

    for i, txt_file in enumerate(txt_files):
        label = "PR %s" % chr(65 + i)  # PR A, PR B, etc.
        try:
            original = txt_file.read_text(encoding="utf-8")
            if not original.strip():
                continue

            prompt = build_fact_check_prompt(
                original, company_name, url, topic, keyword
            )

            cmd = [
                claude_bin,
                "-p", prompt,
                "--output-format", "text",
                "--permission-mode", "bypassPermissions",
                "--allowedTools", "WebSearch,WebFetch",
                "--max-turns", "10",
                "--model", FACT_CHECK_MODEL,
            ]

            log.info("Fact-checking %s: %s", label, txt_file.name)
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=timeout,
                cwd=str(txt_file.parent),
            )

            if result.returncode != 0:
                log.warning(
                    "Fact-check %s failed (exit %d): %s",
                    label, result.returncode, (result.stderr or "")[:500],
                )
                status_lines.append(
                    "Fact-check %s: could not run -- manual review recommended" % label
                )
                any_failed = True
                continue

            corrected, status, changes = apply_fact_check(result.stdout, original)

            if status == "corrected":
                txt_file.write_text(corrected, encoding="utf-8")
                log.info("Fact-check %s: corrections applied", label)
                line = "Fact-check %s: corrections applied" % label
                if changes:
                    line += "\n  %s" % changes
                status_lines.append(line)
            elif status == "clean":
                log.info("Fact-check %s: no errors found", label)
                status_lines.append("Fact-check %s: no errors found" % label)
            else:
                log.warning("Fact-check %s: skipped (unparseable output)", label)
                status_lines.append(
                    "Fact-check %s: could not run -- manual review recommended" % label
                )
                any_failed = True

        except subprocess.TimeoutExpired:
            log.warning("Fact-check %s timed out after %ds", label, timeout)
            status_lines.append(
                "Fact-check %s: timed out -- manual review recommended" % label
            )
            any_failed = True
        except Exception as e:
            log.warning("Fact-check %s error: %s", label, e)
            status_lines.append(
                "Fact-check %s: could not run -- manual review recommended" % label
            )
            any_failed = True

    return status_lines, any_failed