Add adversarial fact-check step to press release pipeline

Sonnet + WebSearch reviews each PR between generation and schema steps. Returns [NO_ERRORS] or [CORRECTED] with change log; rewrites that shift word count by more than 15% are rejected. Fact-check failures are graceful -- PR still ships with a ClickUp note that manual review is recommended. Wired into both the legacy pipeline and the headless clickup_runner. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 09:16:19 -05:00 · 2026-04-21 09:16:19 -05:00 · abb6e1841b
parent 38a88987a0
commit abb6e1841b
5 changed files with 678 additions and 15 deletions
--- a/cheddahbot/tools/press_release.py
+++ b/cheddahbot/tools/press_release.py
@ -4,8 +4,9 @@ Autonomous workflow:
  1. Generate 7 compliant headlines    (chat brain)
  2. AI judge picks the 2 best         (chat brain)
  3. Write 2 full press releases       (execution brain x 2)
  3.5. Adversarial fact-check          (Sonnet + WebSearch, graceful failure)
  4. Generate 2 JSON-LD schemas        (execution brain x 2, Sonnet + WebSearch)
-  5. Save 4 files, return cost summary
+  5. Save files, return cost summary
 """
 from __future__ import annotations
@ -35,6 +36,7 @@ _COMPANIES_FILE = _SKILLS_DIR / "companies.md"
 _HEADLINES_FILE = _SKILLS_DIR / "headlines.md"
 SONNET_CLI_MODEL = "sonnet"
 FACT_CHECK_MODEL = "sonnet"
 def _set_status(ctx: dict | None, message: str) -> None:
@ -524,6 +526,103 @@ def _build_schema_prompt(pr_text: str, company_name: str, url: str, skill_text:
    return prompt
 def _build_fact_check_prompt(
    pr_text: str,
    company_name: str,
    url: str,
    topic: str,
    keyword: str,
 ) -> str:
    """Build the prompt for the adversarial fact-checker step."""
    return (
        "You are a factual accuracy reviewer for press releases. Your ONLY job is to "
        "find and correct statements that are factually wrong. You are NOT an editor.\n\n"
        "GROUND TRUTH -- the following data was provided by the client and is correct "
        "by definition. Do NOT change, question, or 'correct' any of it, even if your "
        "web search suggests something different:\n"
        f"  - Company name: {company_name}\n"
        f"  - Target URL: {url}\n"
        f"  - Topic: {topic}\n"
        f"  - Keyword: {keyword}\n"
        "  - Any person names, titles, quotes, or contact details in the PR\n"
        "  - Any product names, service names, or brand names\n"
        "  - The overall framing, angle, and tone of the PR\n\n"
        "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
        "  - Industry statistics or market size claims\n"
        "  - Historical dates or facts\n"
        "  - Technical specifications not sourced from the client data\n"
        "  - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
        "  - Geographic or regulatory facts\n\n"
        "RULES:\n"
        "  - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
        "  - Do NOT add content, remove content, restructure, or 'improve' anything\n"
        "  - Do NOT change tone, style, word choice, or sentence structure\n"
        "  - Do NOT suggest additions or enhancements\n"
        "  - Make the MINIMUM change needed to fix each error\n"
        "  - Preserve the exact formatting, paragraph breaks, and headline\n\n"
        "OUTPUT FORMAT:\n"
        "  - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
        "  - If you find errors: output [CORRECTED] on the first line, then the full "
        "corrected PR text (preserving all formatting), then a blank line, then "
        "CHANGES: followed by a numbered list of what you changed and why\n\n"
        "Press release to review:\n"
        "---\n"
        f"{pr_text}\n"
        "---"
    )
 def _apply_fact_check(
    raw_output: str, original_text: str
 ) -> tuple[str, str, str]:
    """Parse fact-checker output. Returns (text, status, changes).
    status is one of: "clean", "corrected", "skipped"
    On any parse failure or suspect rewrite, returns original text unchanged.
    """
    if not raw_output or not raw_output.strip():
        return original_text, "skipped", ""
    stripped = raw_output.strip()
    # No errors found
    if stripped.startswith("[NO_ERRORS]"):
        return original_text, "clean", ""
    # Corrections found
    if stripped.startswith("[CORRECTED]"):
        # Split off the [CORRECTED] prefix
        body = stripped[len("[CORRECTED]"):].strip()
        # Split into corrected text and change log
        changes = ""
        if "\nCHANGES:" in body:
            text_part, changes = body.split("\nCHANGES:", 1)
            corrected = text_part.strip()
            changes = changes.strip()
        else:
            corrected = body
        if not corrected:
            return original_text, "skipped", ""
        # Safety: reject if word count differs by more than 15%
        orig_wc = _word_count(original_text)
        new_wc = _word_count(corrected)
        if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
            log.warning(
                "Fact-check rejected: word count changed too much "
                "(%d -> %d, %.0f%% delta)",
                orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
            )
            return original_text, "skipped", "rejected -- word count delta too large"
        return corrected, "corrected", changes
    # Unparseable output
    return original_text, "skipped", ""
 # ---------------------------------------------------------------------------
 # Main tool
 # ---------------------------------------------------------------------------
@ -601,8 +700,8 @@ def write_press_releases(
    cost_log: list[dict] = []
    # ── Step 1: Generate 7 headlines (chat brain) ─────────────────────────
-    log.info("[PR Pipeline] Step 1/4: Generating 7 headlines for %s...", company_name)
+    log.info("[PR Pipeline] Step 1/5: Generating 7 headlines for %s...", company_name)
-    _set_status(ctx, f"Step 1/4: Generating 7 headlines for {company_name}...")
+    _set_status(ctx, f"Step 1/5: Generating 7 headlines for {company_name}...")
    step_start = time.time()
    headline_prompt = _build_headline_prompt(topic, company_name, url, lsi_terms, headlines_ref)
    messages = [
@ -627,8 +726,8 @@ def write_press_releases(
    headlines_file.write_text(headlines_raw.strip(), encoding="utf-8")
    # ── Step 2: AI judge picks best 2 (chat brain) ───────────────────────
-    log.info("[PR Pipeline] Step 2/4: AI judge selecting best 2 headlines...")
+    log.info("[PR Pipeline] Step 2/5: AI judge selecting best 2 headlines...")
-    _set_status(ctx, "Step 2/4: AI judge selecting best 2 headlines...")
+    _set_status(ctx, "Step 2/5: AI judge selecting best 2 headlines...")
    step_start = time.time()
    judge_prompt = _build_judge_prompt(headlines_raw, headlines_ref, topic)
    messages = [
@ -666,7 +765,7 @@ def write_press_releases(
    winners = winners[:2]
    # ── Step 3: Write 2 press releases (execution brain x 2) ─────────────
-    log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...")
+    log.info("[PR Pipeline] Step 3/5: Writing 2 press releases...")
    anchor_phrase = _derive_anchor_phrase(company_name, keyword) if keyword else ""
    pr_texts: list[str] = []
    pr_files: list[str] = []
@ -674,7 +773,7 @@ def write_press_releases(
    anchor_warnings: list[str] = []
    for i, headline in enumerate(winners):
        log.info("[PR Pipeline]   Writing PR %d/2: %s", i + 1, headline[:60])
-        _set_status(ctx, f"Step 3/4: Writing press release {i + 1}/2 — {headline[:60]}...")
+        _set_status(ctx, f"Step 3/5: Writing press release {i + 1}/2 — {headline[:60]}...")
        step_start = time.time()
        pr_prompt = _build_pr_prompt(
            headline,
@ -737,6 +836,65 @@ def write_press_releases(
        text_to_docx(clean_result, docx_path)
        docx_files.append(str(docx_path))
    # ── Step 3.5: Adversarial fact-check (Sonnet + WebSearch) ───────────
    log.info("[PR Pipeline] Step 3.5/5: Running adversarial fact-check...")
    fact_check_statuses: list[str] = []  # per-PR: "clean", "corrected", "skipped"
    fact_check_changes: list[str] = []   # per-PR change log (empty if clean/skipped)
    fact_check_failed = False
    for i, pr_text in enumerate(pr_texts):
        log.info("[PR Pipeline]   Fact-checking PR %d/2...", i + 1)
        _set_status(ctx, f"Step 3.5/5: Fact-checking PR {i + 1}/2...")
        step_start = time.time()
        try:
            fc_prompt = _build_fact_check_prompt(
                pr_text, company_name, url, topic, keyword
            )
            fc_result = agent.execute_task(
                fc_prompt, tools="WebSearch,WebFetch", model=FACT_CHECK_MODEL
            )
            corrected, status, changes = _apply_fact_check(fc_result, pr_text)
            fact_check_statuses.append(status)
            fact_check_changes.append(changes)
            if status == "corrected":
                pr_texts[i] = corrected
                # Re-write files with corrected text
                Path(pr_files[i]).write_text(corrected, encoding="utf-8")
                text_to_docx(corrected, Path(docx_files[i]))
                log.info(
                    "[PR Pipeline]   PR %d: %d correction(s) applied",
                    i + 1, changes.count("\n") + 1 if changes else 1,
                )
            elif status == "clean":
                log.info("[PR Pipeline]   PR %d: no factual errors found", i + 1)
            else:
                log.warning("[PR Pipeline]   PR %d: fact-check skipped (unparseable output)", i + 1)
            elapsed = round(time.time() - step_start, 1)
            cost_log.append(
                {
                    "step": f"3.5{chr(97 + i)}. Fact-check PR {i + 1}",
                    "model": FACT_CHECK_MODEL,
                    "elapsed_s": elapsed,
                }
            )
        except Exception as e:
            fact_check_failed = True
            fact_check_statuses.append("skipped")
            fact_check_changes.append("")
            log.warning("[PR Pipeline]   PR %d fact-check failed: %s", i + 1, e)
    # Notify ClickUp if fact-check could not run at all
    if fact_check_failed and clickup_task_id and cu_client:
        try:
            cu_client.add_comment(
                clickup_task_id,
                "Note: factual accuracy check could not be run on this PR. "
                "Manual review recommended.",
            )
        except Exception as e:
            log.warning("ClickUp fact-check warning failed for %s: %s", clickup_task_id, e)
    # ── ClickUp: upload docx attachments + comment ─────────────────────
    uploaded_count = 0
    failed_uploads: list[str] = []
@ -755,11 +913,27 @@ def write_press_releases(
                    f"\n[WARNING]Warning: {len(failed_uploads)} attachment(s) failed to upload. "
                    f"Files saved locally at:\n{paths_list}"
                )
            # Build fact-check summary for comment
            fc_summary = ""
            for fi, fc_status in enumerate(fact_check_statuses):
                label = f"PR {chr(65 + fi)}"
                if fc_status == "corrected":
                    fc_summary += f"\nFact-check {label}: corrections applied"
                    if fact_check_changes[fi]:
                        fc_summary += f"\n  {fact_check_changes[fi]}"
                elif fc_status == "clean":
                    fc_summary += f"\nFact-check {label}: no errors found"
                else:
                    fc_summary += (
                        f"\nFact-check {label}: could not run -- manual review recommended"
                    )
            cu_client.add_comment(
                clickup_task_id,
-                f"📎 Saved {len(docx_files)} press release(s). "
+                f"Saved {len(docx_files)} press release(s). "
                f"{uploaded_count} file(s) attached.\n"
-                f"Generating JSON-LD schemas next...{upload_warning}",
+                f"Generating JSON-LD schemas next...{upload_warning}"
                f"{fc_summary}",
            )
            log.info(
                "ClickUp: uploaded %d attachments for task %s", uploaded_count, clickup_task_id
@ -768,12 +942,12 @@ def write_press_releases(
            log.warning("ClickUp attachment upload failed for %s: %s", clickup_task_id, e)
    # ── Step 4: Generate 2 JSON-LD schemas (Sonnet + WebSearch) ───────────
-    log.info("[PR Pipeline] Step 4/4: Generating 2 JSON-LD schemas...")
+    log.info("[PR Pipeline] Step 4/5: Generating 2 JSON-LD schemas...")
    schema_texts: list[str] = []
    schema_files: list[str] = []
    for i, pr_text in enumerate(pr_texts):
        log.info("[PR Pipeline]   Schema %d/2 for: %s", i + 1, winners[i][:60])
-        _set_status(ctx, f"Step 4/4: Generating schema {i + 1}/2...")
+        _set_status(ctx, f"Step 4/5: Generating schema {i + 1}/2...")
        step_start = time.time()
        schema_prompt = _build_schema_prompt(pr_text, company_name, url, schema_skill)
        exec_tools = "WebSearch,WebFetch"
--- a/clickup_runner/main.py
+++ b/clickup_runner/main.py
@ -16,6 +16,7 @@ from pathlib import Path
 from .autocora import archive_result, scan_results, submit_job
 from .blm import find_cora_xlsx, run_generate, run_ingest
 from .fact_check import fact_check_pr_files
 from .claude_runner import (
    RunResult,
    build_prompt,
@ -632,6 +633,24 @@ def _dispatch_claude(
        _cleanup_work_dir(result.work_dir)
        return
    # 5b. Fact-check PR files (Press Release only, graceful failure)
    fc_status_lines: list[str] = []
    if task.task_type == "Press Release":
        log.info("Running adversarial fact-check for task %s", task.id)
        company = task.get_field_value("Client") or ""
        pr_topic = task.get_field_value("PR Topic") or ""
        pr_keyword = task.get_field_value("Keyword") or ""
        pr_url = task.get_field_value("IMSURL") or ""
        fc_status_lines, fc_failed = fact_check_pr_files(
            result.output_files,
            company_name=company,
            url=pr_url,
            topic=pr_topic,
            keyword=pr_keyword,
        )
        if fc_failed:
            log.warning("Fact-check had failures for task %s", task.id)
    # 6. Upload output files to ClickUp
    uploaded = 0
    for f in result.output_files:
@ -651,6 +670,8 @@ def _dispatch_claude(
    # 9. Post success comment
    summary = "Stage complete. %d file(s) attached." % uploaded
    if fc_status_lines:
        summary += "\n" + "\n".join(fc_status_lines)
    if result.output:
        # Include first 500 chars of Claude's output as context
        truncated = result.output[:500]
--- a/clickup_runner/fact_check.py
+++ b/clickup_runner/fact_check.py
@ -0,0 +1,220 @@
 """Adversarial fact-checker for press release outputs.
 Runs a second Claude Code pass on generated PR text files to catch
 factual errors. Treats all client-provided data (company name, titles,
 URLs, topic) as ground truth and only corrects claims the PR inferred
 or fabricated beyond what was given.
 Graceful failure: any error returns the original text untouched.
 """
 from __future__ import annotations
 import logging
 import shutil
 import subprocess
 from pathlib import Path
 log = logging.getLogger(__name__)
 FACT_CHECK_MODEL = "sonnet"
 FACT_CHECK_TIMEOUT = 300  # 5 minutes per PR
 def build_fact_check_prompt(
    pr_text: str,
    company_name: str,
    url: str,
    topic: str,
    keyword: str,
 ) -> str:
    """Build the prompt for the adversarial fact-checker."""
    return (
        "You are a factual accuracy reviewer for press releases. Your ONLY job is to "
        "find and correct statements that are factually wrong. You are NOT an editor.\n\n"
        "GROUND TRUTH -- the following data was provided by the client and is correct "
        "by definition. Do NOT change, question, or 'correct' any of it, even if your "
        "web search suggests something different:\n"
        "  - Company name: %s\n"
        "  - Target URL: %s\n"
        "  - Topic: %s\n"
        "  - Keyword: %s\n"
        "  - Any person names, titles, quotes, or contact details in the PR\n"
        "  - Any product names, service names, or brand names\n"
        "  - The overall framing, angle, and tone of the PR\n\n"
        "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
        "  - Industry statistics or market size claims\n"
        "  - Historical dates or facts\n"
        "  - Technical specifications not sourced from the client data\n"
        "  - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
        "  - Geographic or regulatory facts\n\n"
        "RULES:\n"
        "  - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
        "  - Do NOT add content, remove content, restructure, or 'improve' anything\n"
        "  - Do NOT change tone, style, word choice, or sentence structure\n"
        "  - Do NOT suggest additions or enhancements\n"
        "  - Make the MINIMUM change needed to fix each error\n"
        "  - Preserve the exact formatting, paragraph breaks, and headline\n\n"
        "OUTPUT FORMAT:\n"
        "  - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
        "  - If you find errors: output [CORRECTED] on the first line, then the full "
        "corrected PR text (preserving all formatting), then a blank line, then "
        "CHANGES: followed by a numbered list of what you changed and why\n\n"
        "Press release to review:\n"
        "---\n"
        "%s\n"
        "---"
    ) % (company_name, url, topic, keyword, pr_text)
 def apply_fact_check(raw_output: str, original_text: str) -> tuple[str, str, str]:
    """Parse fact-checker output. Returns (text, status, changes).
    status is one of: "clean", "corrected", "skipped"
    On any parse failure or suspect rewrite, returns original text unchanged.
    """
    if not raw_output or not raw_output.strip():
        return original_text, "skipped", ""
    stripped = raw_output.strip()
    # No errors found
    if stripped.startswith("[NO_ERRORS]"):
        return original_text, "clean", ""
    # Corrections found
    if stripped.startswith("[CORRECTED]"):
        body = stripped[len("[CORRECTED]"):].strip()
        # Split into corrected text and change log
        changes = ""
        if "\nCHANGES:" in body:
            text_part, changes = body.split("\nCHANGES:", 1)
            corrected = text_part.strip()
            changes = changes.strip()
        else:
            corrected = body
        if not corrected:
            return original_text, "skipped", ""
        # Safety: reject if word count differs by more than 15%
        orig_wc = len(original_text.split())
        new_wc = len(corrected.split())
        if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
            log.warning(
                "Fact-check rejected: word count changed too much "
                "(%d -> %d, %.0f%% delta)",
                orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
            )
            return original_text, "skipped", "rejected -- word count delta too large"
        return corrected, "corrected", changes
    # Unparseable output
    return original_text, "skipped", ""
 def fact_check_pr_files(
    output_files: list[Path],
    company_name: str,
    url: str,
    topic: str,
    keyword: str,
    timeout: int = FACT_CHECK_TIMEOUT,
 ) -> tuple[list[str], bool]:
    """Run fact-check on .txt PR files in the output list.
    Returns:
        (status_lines, any_failed) where status_lines is a list of
        human-readable results per PR, and any_failed is True if the
        fact-checker could not run on at least one PR.
    """
    claude_bin = shutil.which("claude")
    if not claude_bin:
        log.warning("Fact-check: claude CLI not found, skipping")
        return ["Fact-check: claude CLI not found, skipped"], True
    txt_files = [f for f in output_files if f.suffix == ".txt"]
    # Skip non-PR files like "Headlines Evaluation.md"
    # PR files are the .txt files (the actual press releases)
    if not txt_files:
        return [], False
    status_lines: list[str] = []
    any_failed = False
    for i, txt_file in enumerate(txt_files):
        label = "PR %s" % chr(65 + i)  # PR A, PR B, etc.
        try:
            original = txt_file.read_text(encoding="utf-8")
            if not original.strip():
                continue
            prompt = build_fact_check_prompt(
                original, company_name, url, topic, keyword
            )
            cmd = [
                claude_bin,
                "-p", prompt,
                "--output-format", "text",
                "--permission-mode", "bypassPermissions",
                "--allowedTools", "WebSearch,WebFetch",
                "--max-turns", "10",
                "--model", FACT_CHECK_MODEL,
            ]
            log.info("Fact-checking %s: %s", label, txt_file.name)
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=timeout,
                cwd=str(txt_file.parent),
            )
            if result.returncode != 0:
                log.warning(
                    "Fact-check %s failed (exit %d): %s",
                    label, result.returncode, (result.stderr or "")[:500],
                )
                status_lines.append(
                    "Fact-check %s: could not run -- manual review recommended" % label
                )
                any_failed = True
                continue
            corrected, status, changes = apply_fact_check(result.stdout, original)
            if status == "corrected":
                txt_file.write_text(corrected, encoding="utf-8")
                log.info("Fact-check %s: corrections applied", label)
                line = "Fact-check %s: corrections applied" % label
                if changes:
                    line += "\n  %s" % changes
                status_lines.append(line)
            elif status == "clean":
                log.info("Fact-check %s: no errors found", label)
                status_lines.append("Fact-check %s: no errors found" % label)
            else:
                log.warning("Fact-check %s: skipped (unparseable output)", label)
                status_lines.append(
                    "Fact-check %s: could not run -- manual review recommended" % label
                )
                any_failed = True
        except subprocess.TimeoutExpired:
            log.warning("Fact-check %s timed out after %ds", label, timeout)
            status_lines.append(
                "Fact-check %s: timed out -- manual review recommended" % label
            )
            any_failed = True
        except Exception as e:
            log.warning("Fact-check %s error: %s", label, e)
            status_lines.append(
                "Fact-check %s: could not run -- manual review recommended" % label
            )
            any_failed = True
    return status_lines, any_failed
--- a/tests/test_fact_check.py
+++ b/tests/test_fact_check.py
@ -0,0 +1,126 @@
 """Tests for the adversarial fact-checker helpers in press_release.py."""
 from cheddahbot.tools.press_release import _apply_fact_check, _build_fact_check_prompt
 class TestApplyFactCheck:
    """Tests for _apply_fact_check output parsing."""
    ORIGINAL = (
        "Acme Corp Delivers Advanced Widget Solutions\n\n"
        "Acme Corp, a leading manufacturer of widgets, today highlighted "
        "its expanded product line. The company, based in Milwaukee, Wisconsin, "
        "produces over 500 widget variants for industrial applications."
    )
    def test_no_errors_returns_original(self):
        text, status, changes = _apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
        assert status == "clean"
        assert text == self.ORIGINAL
        assert changes == ""
    def test_no_errors_with_trailing_whitespace(self):
        text, status, changes = _apply_fact_check("[NO_ERRORS]  \n", self.ORIGINAL)
        assert status == "clean"
        assert text == self.ORIGINAL
    def test_corrected_with_changes(self):
        corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
        raw = (
            f"[CORRECTED]\n{corrected_pr}\n\n"
            "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
            "-- company website lists 300."
        )
        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "300 widget" in text
        assert "500" not in text
        assert "300 widget variants" in changes
    def test_corrected_without_changes_section(self):
        corrected_pr = self.ORIGINAL.replace("500", "300")
        raw = f"[CORRECTED]\n{corrected_pr}"
        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "300" in text
        assert changes == ""
    def test_empty_output_returns_skipped(self):
        text, status, changes = _apply_fact_check("", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_none_like_output_returns_skipped(self):
        text, status, changes = _apply_fact_check("   \n  ", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_garbage_output_returns_skipped(self):
        text, status, changes = _apply_fact_check(
            "I reviewed the press release and it looks good overall.", self.ORIGINAL
        )
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_rejects_oversized_rewrite(self):
        """If fact-checker rewrites too much (>15% word count delta), reject."""
        # Double the content -- way more than 15%
        bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content here."
        raw = f"[CORRECTED]\n{bloated}\n\nCHANGES:\n1. Added more detail."
        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
        assert "word count delta" in changes
    def test_accepts_minor_word_count_change(self):
        """Small changes (within 15%) should be accepted."""
        # Change one word -- well within 15%
        minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
        raw = (
            f"[CORRECTED]\n{minor_edit}\n\n"
            "CHANGES:\n1. Corrected variant count from 500 to 480."
        )
        text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "480" in text
    def test_corrected_but_empty_body_returns_skipped(self):
        text, status, changes = _apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
 class TestBuildFactCheckPrompt:
    """Tests for _build_fact_check_prompt structure."""
    def test_includes_ground_truth_data(self):
        prompt = _build_fact_check_prompt(
            "Some PR text here.",
            company_name="Acme Corp",
            url="https://acme.com",
            topic="widgets",
            keyword="industrial widgets",
        )
        assert "Acme Corp" in prompt
        assert "https://acme.com" in prompt
        assert "widgets" in prompt
        assert "industrial widgets" in prompt
        assert "ground truth" in prompt.lower() or "GROUND TRUTH" in prompt
    def test_includes_pr_text(self):
        prompt = _build_fact_check_prompt(
            "The quick brown fox.",
            company_name="Test",
            url="https://test.com",
            topic="foxes",
            keyword="brown fox",
        )
        assert "The quick brown fox." in prompt
    def test_output_format_instructions(self):
        prompt = _build_fact_check_prompt(
            "Text.", company_name="X", url="u", topic="t", keyword="k"
        )
        assert "[NO_ERRORS]" in prompt
        assert "[CORRECTED]" in prompt
        assert "CHANGES:" in prompt
--- a/tests/test_fact_check_runner.py
+++ b/tests/test_fact_check_runner.py
@ -0,0 +1,122 @@
 """Tests for clickup_runner.fact_check module."""
 from clickup_runner.fact_check import apply_fact_check, build_fact_check_prompt
 class TestApplyFactCheck:
    """Tests for apply_fact_check output parsing."""
    ORIGINAL = (
        "Acme Corp Delivers Advanced Widget Solutions\n\n"
        "Acme Corp, a leading manufacturer of widgets, today highlighted "
        "its expanded product line. The company, based in Milwaukee, Wisconsin, "
        "produces over 500 widget variants for industrial applications."
    )
    def test_no_errors_returns_original(self):
        text, status, changes = apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
        assert status == "clean"
        assert text == self.ORIGINAL
        assert changes == ""
    def test_no_errors_with_trailing_whitespace(self):
        text, status, changes = apply_fact_check("[NO_ERRORS]  \n", self.ORIGINAL)
        assert status == "clean"
        assert text == self.ORIGINAL
    def test_corrected_with_changes(self):
        corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
        raw = (
            "[CORRECTED]\n%s\n\n"
            "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
            "-- company website lists 300." % corrected_pr
        )
        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "300 widget" in text
        assert "500" not in text
        assert "300 widget variants" in changes
    def test_corrected_without_changes_section(self):
        corrected_pr = self.ORIGINAL.replace("500", "300")
        raw = "[CORRECTED]\n%s" % corrected_pr
        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "300" in text
        assert changes == ""
    def test_empty_output_returns_skipped(self):
        text, status, changes = apply_fact_check("", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_whitespace_only_returns_skipped(self):
        text, status, changes = apply_fact_check("   \n  ", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_garbage_output_returns_skipped(self):
        text, status, changes = apply_fact_check(
            "I reviewed the press release and it looks good overall.", self.ORIGINAL
        )
        assert status == "skipped"
        assert text == self.ORIGINAL
    def test_rejects_oversized_rewrite(self):
        bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content."
        raw = "[CORRECTED]\n%s\n\nCHANGES:\n1. Added more detail." % bloated
        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
        assert "word count delta" in changes
    def test_accepts_minor_word_count_change(self):
        minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
        raw = (
            "[CORRECTED]\n%s\n\n"
            "CHANGES:\n1. Corrected variant count from 500 to 480." % minor_edit
        )
        text, status, changes = apply_fact_check(raw, self.ORIGINAL)
        assert status == "corrected"
        assert "480" in text
    def test_corrected_empty_body_returns_skipped(self):
        text, status, changes = apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
        assert status == "skipped"
        assert text == self.ORIGINAL
 class TestBuildFactCheckPrompt:
    """Tests for build_fact_check_prompt structure."""
    def test_includes_ground_truth_data(self):
        prompt = build_fact_check_prompt(
            "Some PR text.",
            company_name="Acme Corp",
            url="https://acme.com",
            topic="widgets",
            keyword="industrial widgets",
        )
        assert "Acme Corp" in prompt
        assert "https://acme.com" in prompt
        assert "widgets" in prompt
        assert "industrial widgets" in prompt
        assert "GROUND TRUTH" in prompt
    def test_includes_pr_text(self):
        prompt = build_fact_check_prompt(
            "The quick brown fox.",
            company_name="Test",
            url="https://test.com",
            topic="foxes",
            keyword="brown fox",
        )
        assert "The quick brown fox." in prompt
    def test_output_format_instructions(self):
        prompt = build_fact_check_prompt(
            "Text.", company_name="X", url="u", topic="t", keyword="k"
        )
        assert "[NO_ERRORS]" in prompt
        assert "[CORRECTED]" in prompt
        assert "CHANGES:" in prompt