Add adversarial fact-check step to press release pipeline

Sonnet + WebSearch reviews each PR between generation and schema steps. Returns [NO_ERRORS] or [CORRECTED] with change log; rewrites that shift word count by more than 15% are rejected. Fact-check failures are graceful -- PR still ships with a ClickUp note that manual review is recommended. Wired into both the legacy pipeline and the headless clickup_runner. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 09:16:19 -05:00 · 2026-04-21 09:16:19 -05:00 · abb6e1841b
parent 38a88987a0
commit abb6e1841b
5 changed files with 678 additions and 15 deletions
--- a/cheddahbot/tools/press_release.py
+++ b/cheddahbot/tools/press_release.py
@ -4,8 +4,9 @@ Autonomous workflow:
  1. Generate 7 compliant headlines    (chat brain)
  2. AI judge picks the 2 best         (chat brain)
  3. Write 2 full press releases       (execution brain x 2)
+  3.5. Adversarial fact-check          (Sonnet + WebSearch, graceful failure)
  4. Generate 2 JSON-LD schemas        (execution brain x 2, Sonnet + WebSearch)
-  5. Save 4 files, return cost summary
+  5. Save files, return cost summary
 """

 from __future__ import annotations
@ -35,6 +36,7 @@ _COMPANIES_FILE = _SKILLS_DIR / "companies.md"
 _HEADLINES_FILE = _SKILLS_DIR / "headlines.md"

 SONNET_CLI_MODEL = "sonnet"
+FACT_CHECK_MODEL = "sonnet"


 def _set_status(ctx: dict | None, message: str) -> None:
@ -524,6 +526,103 @@ def _build_schema_prompt(pr_text: str, company_name: str, url: str, skill_text:
    return prompt


+def _build_fact_check_prompt(
+    pr_text: str,
+    company_name: str,
+    url: str,
+    topic: str,
+    keyword: str,
+) -> str:
+    """Build the prompt for the adversarial fact-checker step."""
+    return (
+        "You are a factual accuracy reviewer for press releases. Your ONLY job is to "
+        "find and correct statements that are factually wrong. You are NOT an editor.\n\n"
+        "GROUND TRUTH -- the following data was provided by the client and is correct "
+        "by definition. Do NOT change, question, or 'correct' any of it, even if your "
+        "web search suggests something different:\n"
+        f"  - Company name: {company_name}\n"
+        f"  - Target URL: {url}\n"
+        f"  - Topic: {topic}\n"
+        f"  - Keyword: {keyword}\n"
+        "  - Any person names, titles, quotes, or contact details in the PR\n"
+        "  - Any product names, service names, or brand names\n"
+        "  - The overall framing, angle, and tone of the PR\n\n"
+        "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
+        "  - Industry statistics or market size claims\n"
+        "  - Historical dates or facts\n"
+        "  - Technical specifications not sourced from the client data\n"
+        "  - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
+        "  - Geographic or regulatory facts\n\n"
+        "RULES:\n"
+        "  - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
+        "  - Do NOT add content, remove content, restructure, or 'improve' anything\n"
+        "  - Do NOT change tone, style, word choice, or sentence structure\n"
+        "  - Do NOT suggest additions or enhancements\n"
+        "  - Make the MINIMUM change needed to fix each error\n"
+        "  - Preserve the exact formatting, paragraph breaks, and headline\n\n"
+        "OUTPUT FORMAT:\n"
+        "  - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
+        "  - If you find errors: output [CORRECTED] on the first line, then the full "
+        "corrected PR text (preserving all formatting), then a blank line, then "
+        "CHANGES: followed by a numbered list of what you changed and why\n\n"
+        "Press release to review:\n"
+        "---\n"
+        f"{pr_text}\n"
+        "---"
+    )
+
+
+def _apply_fact_check(
+    raw_output: str, original_text: str
+) -> tuple[str, str, str]:
+    """Parse fact-checker output. Returns (text, status, changes).
+
+    status is one of: "clean", "corrected", "skipped"
+    On any parse failure or suspect rewrite, returns original text unchanged.
+    """
+    if not raw_output or not raw_output.strip():
+        return original_text, "skipped", ""
+
+    stripped = raw_output.strip()
+
+    # No errors found
+    if stripped.startswith("[NO_ERRORS]"):
+        return original_text, "clean", ""
+
+    # Corrections found
+    if stripped.startswith("[CORRECTED]"):
+        # Split off the [CORRECTED] prefix
+        body = stripped[len("[CORRECTED]"):].strip()
+
+        # Split into corrected text and change log
+        changes = ""
+        if "\nCHANGES:" in body:
+            text_part, changes = body.split("\nCHANGES:", 1)
+            corrected = text_part.strip()
+            changes = changes.strip()
+        else:
+            corrected = body
+
+        if not corrected:
+            return original_text, "skipped", ""
+
+        # Safety: reject if word count differs by more than 15%
+        orig_wc = _word_count(original_text)
+        new_wc = _word_count(corrected)
+        if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
+            log.warning(
+                "Fact-check rejected: word count changed too much "
+                "(%d -> %d, %.0f%% delta)",
+                orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
+            )
+            return original_text, "skipped", "rejected -- word count delta too large"
+
+        return corrected, "corrected", changes
+
+    # Unparseable output
+    return original_text, "skipped", ""
+
+
 # ---------------------------------------------------------------------------
 # Main tool
 # ---------------------------------------------------------------------------
@ -601,8 +700,8 @@ def write_press_releases(
    cost_log: list[dict] = []

    # ── Step 1: Generate 7 headlines (chat brain) ─────────────────────────
-    log.info("[PR Pipeline] Step 1/4: Generating 7 headlines for %s...", company_name)
-    _set_status(ctx, f"Step 1/4: Generating 7 headlines for {company_name}...")
+    log.info("[PR Pipeline] Step 1/5: Generating 7 headlines for %s...", company_name)
+    _set_status(ctx, f"Step 1/5: Generating 7 headlines for {company_name}...")
    step_start = time.time()
    headline_prompt = _build_headline_prompt(topic, company_name, url, lsi_terms, headlines_ref)
    messages = [
@ -627,8 +726,8 @@ def write_press_releases(
    headlines_file.write_text(headlines_raw.strip(), encoding="utf-8")

    # ── Step 2: AI judge picks best 2 (chat brain) ───────────────────────
-    log.info("[PR Pipeline] Step 2/4: AI judge selecting best 2 headlines...")
-    _set_status(ctx, "Step 2/4: AI judge selecting best 2 headlines...")
+    log.info("[PR Pipeline] Step 2/5: AI judge selecting best 2 headlines...")
+    _set_status(ctx, "Step 2/5: AI judge selecting best 2 headlines...")
    step_start = time.time()
    judge_prompt = _build_judge_prompt(headlines_raw, headlines_ref, topic)
    messages = [
@ -666,7 +765,7 @@ def write_press_releases(
    winners = winners[:2]

    # ── Step 3: Write 2 press releases (execution brain x 2) ─────────────
-    log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...")
+    log.info("[PR Pipeline] Step 3/5: Writing 2 press releases...")
    anchor_phrase = _derive_anchor_phrase(company_name, keyword) if keyword else ""
    pr_texts: list[str] = []
    pr_files: list[str] = []
@ -674,7 +773,7 @@ def write_press_releases(
    anchor_warnings: list[str] = []
    for i, headline in enumerate(winners):
        log.info("[PR Pipeline]   Writing PR %d/2: %s", i + 1, headline[:60])
-        _set_status(ctx, f"Step 3/4: Writing press release {i + 1}/2 — {headline[:60]}...")
+        _set_status(ctx, f"Step 3/5: Writing press release {i + 1}/2 — {headline[:60]}...")
        step_start = time.time()
        pr_prompt = _build_pr_prompt(
            headline,
@ -737,6 +836,65 @@ def write_press_releases(
        text_to_docx(clean_result, docx_path)
        docx_files.append(str(docx_path))

+    # ── Step 3.5: Adversarial fact-check (Sonnet + WebSearch) ───────────
+    log.info("[PR Pipeline] Step 3.5/5: Running adversarial fact-check...")
+    fact_check_statuses: list[str] = []  # per-PR: "clean", "corrected", "skipped"
+    fact_check_changes: list[str] = []   # per-PR change log (empty if clean/skipped)
+    fact_check_failed = False
+    for i, pr_text in enumerate(pr_texts):
+        log.info("[PR Pipeline]   Fact-checking PR %d/2...", i + 1)
+        _set_status(ctx, f"Step 3.5/5: Fact-checking PR {i + 1}/2...")
+        step_start = time.time()
+        try:
+            fc_prompt = _build_fact_check_prompt(
+                pr_text, company_name, url, topic, keyword
+            )
+            fc_result = agent.execute_task(
+                fc_prompt, tools="WebSearch,WebFetch", model=FACT_CHECK_MODEL
+            )
+            corrected, status, changes = _apply_fact_check(fc_result, pr_text)
+            fact_check_statuses.append(status)
+            fact_check_changes.append(changes)
+
+            if status == "corrected":
+                pr_texts[i] = corrected
+                # Re-write files with corrected text
+                Path(pr_files[i]).write_text(corrected, encoding="utf-8")
+                text_to_docx(corrected, Path(docx_files[i]))
+                log.info(
+                    "[PR Pipeline]   PR %d: %d correction(s) applied",
+                    i + 1, changes.count("\n") + 1 if changes else 1,
+                )
+            elif status == "clean":
+                log.info("[PR Pipeline]   PR %d: no factual errors found", i + 1)
+            else:
+                log.warning("[PR Pipeline]   PR %d: fact-check skipped (unparseable output)", i + 1)
+
+            elapsed = round(time.time() - step_start, 1)
+            cost_log.append(
+                {
+                    "step": f"3.5{chr(97 + i)}. Fact-check PR {i + 1}",
+                    "model": FACT_CHECK_MODEL,
+                    "elapsed_s": elapsed,
+                }
+            )
+        except Exception as e:
+            fact_check_failed = True
+            fact_check_statuses.append("skipped")
+            fact_check_changes.append("")
+            log.warning("[PR Pipeline]   PR %d fact-check failed: %s", i + 1, e)
+
+    # Notify ClickUp if fact-check could not run at all
+    if fact_check_failed and clickup_task_id and cu_client:
+        try:
+            cu_client.add_comment(
+                clickup_task_id,
+                "Note: factual accuracy check could not be run on this PR. "
+                "Manual review recommended.",
+            )
+        except Exception as e:
+            log.warning("ClickUp fact-check warning failed for %s: %s", clickup_task_id, e)
+
    # ── ClickUp: upload docx attachments + comment ─────────────────────
    uploaded_count = 0
    failed_uploads: list[str] = []
@ -755,11 +913,27 @@ def write_press_releases(
                    f"\n[WARNING]Warning: {len(failed_uploads)} attachment(s) failed to upload. "
                    f"Files saved locally at:\n{paths_list}"
                )
+            # Build fact-check summary for comment
+            fc_summary = ""
+            for fi, fc_status in enumerate(fact_check_statuses):
+                label = f"PR {chr(65 + fi)}"
+                if fc_status == "corrected":
+                    fc_summary += f"\nFact-check {label}: corrections applied"
+                    if fact_check_changes[fi]:
+                        fc_summary += f"\n  {fact_check_changes[fi]}"
+                elif fc_status == "clean":
+                    fc_summary += f"\nFact-check {label}: no errors found"
+                else:
+                    fc_summary += (
+                        f"\nFact-check {label}: could not run -- manual review recommended"
+                    )
+
            cu_client.add_comment(
                clickup_task_id,
-                f"📎 Saved {len(docx_files)} press release(s). "
+                f"Saved {len(docx_files)} press release(s). "
                f"{uploaded_count} file(s) attached.\n"
-                f"Generating JSON-LD schemas next...{upload_warning}",
+                f"Generating JSON-LD schemas next...{upload_warning}"
+                f"{fc_summary}",
            )
            log.info(
                "ClickUp: uploaded %d attachments for task %s", uploaded_count, clickup_task_id
@ -768,12 +942,12 @@ def write_press_releases(
            log.warning("ClickUp attachment upload failed for %s: %s", clickup_task_id, e)

    # ── Step 4: Generate 2 JSON-LD schemas (Sonnet + WebSearch) ───────────
-    log.info("[PR Pipeline] Step 4/4: Generating 2 JSON-LD schemas...")
+    log.info("[PR Pipeline] Step 4/5: Generating 2 JSON-LD schemas...")
    schema_texts: list[str] = []
    schema_files: list[str] = []
    for i, pr_text in enumerate(pr_texts):
        log.info("[PR Pipeline]   Schema %d/2 for: %s", i + 1, winners[i][:60])
-        _set_status(ctx, f"Step 4/4: Generating schema {i + 1}/2...")
+        _set_status(ctx, f"Step 4/5: Generating schema {i + 1}/2...")
        step_start = time.time()
        schema_prompt = _build_schema_prompt(pr_text, company_name, url, schema_skill)
        exec_tools = "WebSearch,WebFetch"
--- a/clickup_runner/main.py
+++ b/clickup_runner/main.py
@ -16,6 +16,7 @@ from pathlib import Path

 from .autocora import archive_result, scan_results, submit_job
 from .blm import find_cora_xlsx, run_generate, run_ingest
+from .fact_check import fact_check_pr_files
 from .claude_runner import (
    RunResult,
    build_prompt,
@ -632,6 +633,24 @@ def _dispatch_claude(
        _cleanup_work_dir(result.work_dir)
        return

+    # 5b. Fact-check PR files (Press Release only, graceful failure)
+    fc_status_lines: list[str] = []
+    if task.task_type == "Press Release":
+        log.info("Running adversarial fact-check for task %s", task.id)
+        company = task.get_field_value("Client") or ""
+        pr_topic = task.get_field_value("PR Topic") or ""
+        pr_keyword = task.get_field_value("Keyword") or ""
+        pr_url = task.get_field_value("IMSURL") or ""
+        fc_status_lines, fc_failed = fact_check_pr_files(
+            result.output_files,
+            company_name=company,
+            url=pr_url,
+            topic=pr_topic,
+            keyword=pr_keyword,
+        )
+        if fc_failed:
+            log.warning("Fact-check had failures for task %s", task.id)
+
    # 6. Upload output files to ClickUp
    uploaded = 0
    for f in result.output_files:
@ -651,6 +670,8 @@ def _dispatch_claude(

    # 9. Post success comment
    summary = "Stage complete. %d file(s) attached." % uploaded
+    if fc_status_lines:
+        summary += "\n" + "\n".join(fc_status_lines)
    if result.output:
        # Include first 500 chars of Claude's output as context
        truncated = result.output[:500]
--- a/clickup_runner/fact_check.py
+++ b/clickup_runner/fact_check.py
@ -0,0 +1,220 @@
+"""Adversarial fact-checker for press release outputs.
+
+Runs a second Claude Code pass on generated PR text files to catch
+factual errors. Treats all client-provided data (company name, titles,
+URLs, topic) as ground truth and only corrects claims the PR inferred
+or fabricated beyond what was given.
+
+Graceful failure: any error returns the original text untouched.
+"""
+
+from __future__ import annotations
+
+import logging
+import shutil
+import subprocess
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+FACT_CHECK_MODEL = "sonnet"
+FACT_CHECK_TIMEOUT = 300  # 5 minutes per PR
+
+
+def build_fact_check_prompt(
+    pr_text: str,
+    company_name: str,
+    url: str,
+    topic: str,
+    keyword: str,
+) -> str:
+    """Build the prompt for the adversarial fact-checker."""
+    return (
+        "You are a factual accuracy reviewer for press releases. Your ONLY job is to "
+        "find and correct statements that are factually wrong. You are NOT an editor.\n\n"
+        "GROUND TRUTH -- the following data was provided by the client and is correct "
+        "by definition. Do NOT change, question, or 'correct' any of it, even if your "
+        "web search suggests something different:\n"
+        "  - Company name: %s\n"
+        "  - Target URL: %s\n"
+        "  - Topic: %s\n"
+        "  - Keyword: %s\n"
+        "  - Any person names, titles, quotes, or contact details in the PR\n"
+        "  - Any product names, service names, or brand names\n"
+        "  - The overall framing, angle, and tone of the PR\n\n"
+        "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
+        "  - Industry statistics or market size claims\n"
+        "  - Historical dates or facts\n"
+        "  - Technical specifications not sourced from the client data\n"
+        "  - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
+        "  - Geographic or regulatory facts\n\n"
+        "RULES:\n"
+        "  - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
+        "  - Do NOT add content, remove content, restructure, or 'improve' anything\n"
+        "  - Do NOT change tone, style, word choice, or sentence structure\n"
+        "  - Do NOT suggest additions or enhancements\n"
+        "  - Make the MINIMUM change needed to fix each error\n"
+        "  - Preserve the exact formatting, paragraph breaks, and headline\n\n"
+        "OUTPUT FORMAT:\n"
+        "  - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
+        "  - If you find errors: output [CORRECTED] on the first line, then the full "
+        "corrected PR text (preserving all formatting), then a blank line, then "
+        "CHANGES: followed by a numbered list of what you changed and why\n\n"
+        "Press release to review:\n"
+        "---\n"
+        "%s\n"
+        "---"
+    ) % (company_name, url, topic, keyword, pr_text)
+
+
+def apply_fact_check(raw_output: str, original_text: str) -> tuple[str, str, str]:
+    """Parse fact-checker output. Returns (text, status, changes).
+
+    status is one of: "clean", "corrected", "skipped"
+    On any parse failure or suspect rewrite, returns original text unchanged.
+    """
+    if not raw_output or not raw_output.strip():
+        return original_text, "skipped", ""
+
+    stripped = raw_output.strip()
+
+    # No errors found
+    if stripped.startswith("[NO_ERRORS]"):
+        return original_text, "clean", ""
+
+    # Corrections found
+    if stripped.startswith("[CORRECTED]"):
+        body = stripped[len("[CORRECTED]"):].strip()
+
+        # Split into corrected text and change log
+        changes = ""
+        if "\nCHANGES:" in body:
+            text_part, changes = body.split("\nCHANGES:", 1)
+            corrected = text_part.strip()
+            changes = changes.strip()
+        else:
+            corrected = body
+
+        if not corrected:
+            return original_text, "skipped", ""
+
+        # Safety: reject if word count differs by more than 15%
+        orig_wc = len(original_text.split())
+        new_wc = len(corrected.split())
+        if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
+            log.warning(
+                "Fact-check rejected: word count changed too much "
+                "(%d -> %d, %.0f%% delta)",
+                orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
+            )
+            return original_text, "skipped", "rejected -- word count delta too large"
+
+        return corrected, "corrected", changes
+
+    # Unparseable output
+    return original_text, "skipped", ""
+
+
+def fact_check_pr_files(
+    output_files: list[Path],
+    company_name: str,
+    url: str,
+    topic: str,
+    keyword: str,
+    timeout: int = FACT_CHECK_TIMEOUT,
+) -> tuple[list[str], bool]:
+    """Run fact-check on .txt PR files in the output list.
+
+    Returns:
+        (status_lines, any_failed) where status_lines is a list of
+        human-readable results per PR, and any_failed is True if the
+        fact-checker could not run on at least one PR.
+    """
+    claude_bin = shutil.which("claude")
+    if not claude_bin:
+        log.warning("Fact-check: claude CLI not found, skipping")
+        return ["Fact-check: claude CLI not found, skipped"], True
+
+    txt_files = [f for f in output_files if f.suffix == ".txt"]
+    # Skip non-PR files like "Headlines Evaluation.md"
+    # PR files are the .txt files (the actual press releases)
+    if not txt_files:
+        return [], False
+
+    status_lines: list[str] = []
+    any_failed = False
+
+    for i, txt_file in enumerate(txt_files):
+        label = "PR %s" % chr(65 + i)  # PR A, PR B, etc.
+        try:
+            original = txt_file.read_text(encoding="utf-8")
+            if not original.strip():
+                continue
+
+            prompt = build_fact_check_prompt(
+                original, company_name, url, topic, keyword
+            )
+
+            cmd = [
+                claude_bin,
+                "-p", prompt,
+                "--output-format", "text",
+                "--permission-mode", "bypassPermissions",
+                "--allowedTools", "WebSearch,WebFetch",
+                "--max-turns", "10",
+                "--model", FACT_CHECK_MODEL,
+            ]
+
+            log.info("Fact-checking %s: %s", label, txt_file.name)
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(txt_file.parent),
+            )
+
+            if result.returncode != 0:
+                log.warning(
+                    "Fact-check %s failed (exit %d): %s",
+                    label, result.returncode, (result.stderr or "")[:500],
+                )
+                status_lines.append(
+                    "Fact-check %s: could not run -- manual review recommended" % label
+                )
+                any_failed = True
+                continue
+
+            corrected, status, changes = apply_fact_check(result.stdout, original)
+
+            if status == "corrected":
+                txt_file.write_text(corrected, encoding="utf-8")
+                log.info("Fact-check %s: corrections applied", label)
+                line = "Fact-check %s: corrections applied" % label
+                if changes:
+                    line += "\n  %s" % changes
+                status_lines.append(line)
+            elif status == "clean":
+                log.info("Fact-check %s: no errors found", label)
+                status_lines.append("Fact-check %s: no errors found" % label)
+            else:
+                log.warning("Fact-check %s: skipped (unparseable output)", label)
+                status_lines.append(
+                    "Fact-check %s: could not run -- manual review recommended" % label
+                )
+                any_failed = True
+
+        except subprocess.TimeoutExpired:
+            log.warning("Fact-check %s timed out after %ds", label, timeout)
+            status_lines.append(
+                "Fact-check %s: timed out -- manual review recommended" % label
+            )
+            any_failed = True
+        except Exception as e:
+            log.warning("Fact-check %s error: %s", label, e)
+            status_lines.append(
+                "Fact-check %s: could not run -- manual review recommended" % label
+            )
+            any_failed = True
+
+    return status_lines, any_failed
--- a/tests/test_fact_check.py
+++ b/tests/test_fact_check.py
@ -0,0 +1,126 @@
+"""Tests for the adversarial fact-checker helpers in press_release.py."""
+
+from cheddahbot.tools.press_release import _apply_fact_check, _build_fact_check_prompt
+
+
+class TestApplyFactCheck:
+    """Tests for _apply_fact_check output parsing."""
+
+    ORIGINAL = (
+        "Acme Corp Delivers Advanced Widget Solutions\n\n"
+        "Acme Corp, a leading manufacturer of widgets, today highlighted "
+        "its expanded product line. The company, based in Milwaukee, Wisconsin, "
+        "produces over 500 widget variants for industrial applications."
+    )
+
+    def test_no_errors_returns_original(self):
+        text, status, changes = _apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
+        assert status == "clean"
+        assert text == self.ORIGINAL
+        assert changes == ""
+
+    def test_no_errors_with_trailing_whitespace(self):
+        text, status, changes = _apply_fact_check("[NO_ERRORS]  \n", self.ORIGINAL)
+        assert status == "clean"
+        assert text == self.ORIGINAL
+
+    def test_corrected_with_changes(self):
+        corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
+        raw = (
+            f"[CORRECTED]\n{corrected_pr}\n\n"
+            "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
+            "-- company website lists 300."
+        )
+        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "300 widget" in text
+        assert "500" not in text
+        assert "300 widget variants" in changes
+
+    def test_corrected_without_changes_section(self):
+        corrected_pr = self.ORIGINAL.replace("500", "300")
+        raw = f"[CORRECTED]\n{corrected_pr}"
+        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "300" in text
+        assert changes == ""
+
+    def test_empty_output_returns_skipped(self):
+        text, status, changes = _apply_fact_check("", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_none_like_output_returns_skipped(self):
+        text, status, changes = _apply_fact_check("   \n  ", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_garbage_output_returns_skipped(self):
+        text, status, changes = _apply_fact_check(
+            "I reviewed the press release and it looks good overall.", self.ORIGINAL
+        )
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_rejects_oversized_rewrite(self):
+        """If fact-checker rewrites too much (>15% word count delta), reject."""
+        # Double the content -- way more than 15%
+        bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content here."
+        raw = f"[CORRECTED]\n{bloated}\n\nCHANGES:\n1. Added more detail."
+        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+        assert "word count delta" in changes
+
+    def test_accepts_minor_word_count_change(self):
+        """Small changes (within 15%) should be accepted."""
+        # Change one word -- well within 15%
+        minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
+        raw = (
+            f"[CORRECTED]\n{minor_edit}\n\n"
+            "CHANGES:\n1. Corrected variant count from 500 to 480."
+        )
+        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "480" in text
+
+    def test_corrected_but_empty_body_returns_skipped(self):
+        text, status, changes = _apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+
+class TestBuildFactCheckPrompt:
+    """Tests for _build_fact_check_prompt structure."""
+
+    def test_includes_ground_truth_data(self):
+        prompt = _build_fact_check_prompt(
+            "Some PR text here.",
+            company_name="Acme Corp",
+            url="https://acme.com",
+            topic="widgets",
+            keyword="industrial widgets",
+        )
+        assert "Acme Corp" in prompt
+        assert "https://acme.com" in prompt
+        assert "widgets" in prompt
+        assert "industrial widgets" in prompt
+        assert "ground truth" in prompt.lower() or "GROUND TRUTH" in prompt
+
+    def test_includes_pr_text(self):
+        prompt = _build_fact_check_prompt(
+            "The quick brown fox.",
+            company_name="Test",
+            url="https://test.com",
+            topic="foxes",
+            keyword="brown fox",
+        )
+        assert "The quick brown fox." in prompt
+
+    def test_output_format_instructions(self):
+        prompt = _build_fact_check_prompt(
+            "Text.", company_name="X", url="u", topic="t", keyword="k"
+        )
+        assert "[NO_ERRORS]" in prompt
+        assert "[CORRECTED]" in prompt
+        assert "CHANGES:" in prompt
--- a/tests/test_fact_check_runner.py
+++ b/tests/test_fact_check_runner.py
@ -0,0 +1,122 @@
+"""Tests for clickup_runner.fact_check module."""
+
+from clickup_runner.fact_check import apply_fact_check, build_fact_check_prompt
+
+
+class TestApplyFactCheck:
+    """Tests for apply_fact_check output parsing."""
+
+    ORIGINAL = (
+        "Acme Corp Delivers Advanced Widget Solutions\n\n"
+        "Acme Corp, a leading manufacturer of widgets, today highlighted "
+        "its expanded product line. The company, based in Milwaukee, Wisconsin, "
+        "produces over 500 widget variants for industrial applications."
+    )
+
+    def test_no_errors_returns_original(self):
+        text, status, changes = apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
+        assert status == "clean"
+        assert text == self.ORIGINAL
+        assert changes == ""
+
+    def test_no_errors_with_trailing_whitespace(self):
+        text, status, changes = apply_fact_check("[NO_ERRORS]  \n", self.ORIGINAL)
+        assert status == "clean"
+        assert text == self.ORIGINAL
+
+    def test_corrected_with_changes(self):
+        corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
+        raw = (
+            "[CORRECTED]\n%s\n\n"
+            "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
+            "-- company website lists 300." % corrected_pr
+        )
+        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "300 widget" in text
+        assert "500" not in text
+        assert "300 widget variants" in changes
+
+    def test_corrected_without_changes_section(self):
+        corrected_pr = self.ORIGINAL.replace("500", "300")
+        raw = "[CORRECTED]\n%s" % corrected_pr
+        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "300" in text
+        assert changes == ""
+
+    def test_empty_output_returns_skipped(self):
+        text, status, changes = apply_fact_check("", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_whitespace_only_returns_skipped(self):
+        text, status, changes = apply_fact_check("   \n  ", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_garbage_output_returns_skipped(self):
+        text, status, changes = apply_fact_check(
+            "I reviewed the press release and it looks good overall.", self.ORIGINAL
+        )
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+    def test_rejects_oversized_rewrite(self):
+        bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content."
+        raw = "[CORRECTED]\n%s\n\nCHANGES:\n1. Added more detail." % bloated
+        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+        assert "word count delta" in changes
+
+    def test_accepts_minor_word_count_change(self):
+        minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
+        raw = (
+            "[CORRECTED]\n%s\n\n"
+            "CHANGES:\n1. Corrected variant count from 500 to 480." % minor_edit
+        )
+        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
+        assert status == "corrected"
+        assert "480" in text
+
+    def test_corrected_empty_body_returns_skipped(self):
+        text, status, changes = apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
+        assert status == "skipped"
+        assert text == self.ORIGINAL
+
+
+class TestBuildFactCheckPrompt:
+    """Tests for build_fact_check_prompt structure."""
+
+    def test_includes_ground_truth_data(self):
+        prompt = build_fact_check_prompt(
+            "Some PR text.",
+            company_name="Acme Corp",
+            url="https://acme.com",
+            topic="widgets",
+            keyword="industrial widgets",
+        )
+        assert "Acme Corp" in prompt
+        assert "https://acme.com" in prompt
+        assert "widgets" in prompt
+        assert "industrial widgets" in prompt
+        assert "GROUND TRUTH" in prompt
+
+    def test_includes_pr_text(self):
+        prompt = build_fact_check_prompt(
+            "The quick brown fox.",
+            company_name="Test",
+            url="https://test.com",
+            topic="foxes",
+            keyword="brown fox",
+        )
+        assert "The quick brown fox." in prompt
+
+    def test_output_format_instructions(self):
+        prompt = build_fact_check_prompt(
+            "Text.", company_name="X", url="u", topic="t", keyword="k"
+        )
+        assert "[NO_ERRORS]" in prompt
+        assert "[CORRECTED]" in prompt
+        assert "CHANGES:" in prompt