Add adversarial fact-check step to press release pipeline

Sonnet + WebSearch reviews each PR between generation and schema
steps. Returns [NO_ERRORS] or [CORRECTED] with change log; rewrites
that shift word count by more than 15% are rejected. Fact-check
failures are graceful -- PR still ships with a ClickUp note that
manual review is recommended. Wired into both the legacy pipeline
and the headless clickup_runner.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
clickup-runner
PeninsulaInd 2026-04-21 09:16:19 -05:00
parent 38a88987a0
commit abb6e1841b
5 changed files with 678 additions and 15 deletions

View File

@ -4,8 +4,9 @@ Autonomous workflow:
1. Generate 7 compliant headlines (chat brain)
2. AI judge picks the 2 best (chat brain)
3. Write 2 full press releases (execution brain x 2)
3.5. Adversarial fact-check (Sonnet + WebSearch, graceful failure)
4. Generate 2 JSON-LD schemas (execution brain x 2, Sonnet + WebSearch)
5. Save 4 files, return cost summary
5. Save files, return cost summary
"""
from __future__ import annotations
@ -35,6 +36,7 @@ _COMPANIES_FILE = _SKILLS_DIR / "companies.md"
_HEADLINES_FILE = _SKILLS_DIR / "headlines.md"
SONNET_CLI_MODEL = "sonnet"
FACT_CHECK_MODEL = "sonnet"
def _set_status(ctx: dict | None, message: str) -> None:
@ -524,6 +526,103 @@ def _build_schema_prompt(pr_text: str, company_name: str, url: str, skill_text:
return prompt
def _build_fact_check_prompt(
pr_text: str,
company_name: str,
url: str,
topic: str,
keyword: str,
) -> str:
"""Build the prompt for the adversarial fact-checker step."""
return (
"You are a factual accuracy reviewer for press releases. Your ONLY job is to "
"find and correct statements that are factually wrong. You are NOT an editor.\n\n"
"GROUND TRUTH -- the following data was provided by the client and is correct "
"by definition. Do NOT change, question, or 'correct' any of it, even if your "
"web search suggests something different:\n"
f" - Company name: {company_name}\n"
f" - Target URL: {url}\n"
f" - Topic: {topic}\n"
f" - Keyword: {keyword}\n"
" - Any person names, titles, quotes, or contact details in the PR\n"
" - Any product names, service names, or brand names\n"
" - The overall framing, angle, and tone of the PR\n\n"
"WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
" - Industry statistics or market size claims\n"
" - Historical dates or facts\n"
" - Technical specifications not sourced from the client data\n"
" - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
" - Geographic or regulatory facts\n\n"
"RULES:\n"
" - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
" - Do NOT add content, remove content, restructure, or 'improve' anything\n"
" - Do NOT change tone, style, word choice, or sentence structure\n"
" - Do NOT suggest additions or enhancements\n"
" - Make the MINIMUM change needed to fix each error\n"
" - Preserve the exact formatting, paragraph breaks, and headline\n\n"
"OUTPUT FORMAT:\n"
" - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
" - If you find errors: output [CORRECTED] on the first line, then the full "
"corrected PR text (preserving all formatting), then a blank line, then "
"CHANGES: followed by a numbered list of what you changed and why\n\n"
"Press release to review:\n"
"---\n"
f"{pr_text}\n"
"---"
)
def _apply_fact_check(
raw_output: str, original_text: str
) -> tuple[str, str, str]:
"""Parse fact-checker output. Returns (text, status, changes).
status is one of: "clean", "corrected", "skipped"
On any parse failure or suspect rewrite, returns original text unchanged.
"""
if not raw_output or not raw_output.strip():
return original_text, "skipped", ""
stripped = raw_output.strip()
# No errors found
if stripped.startswith("[NO_ERRORS]"):
return original_text, "clean", ""
# Corrections found
if stripped.startswith("[CORRECTED]"):
# Split off the [CORRECTED] prefix
body = stripped[len("[CORRECTED]"):].strip()
# Split into corrected text and change log
changes = ""
if "\nCHANGES:" in body:
text_part, changes = body.split("\nCHANGES:", 1)
corrected = text_part.strip()
changes = changes.strip()
else:
corrected = body
if not corrected:
return original_text, "skipped", ""
# Safety: reject if word count differs by more than 15%
orig_wc = _word_count(original_text)
new_wc = _word_count(corrected)
if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
log.warning(
"Fact-check rejected: word count changed too much "
"(%d -> %d, %.0f%% delta)",
orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
)
return original_text, "skipped", "rejected -- word count delta too large"
return corrected, "corrected", changes
# Unparseable output
return original_text, "skipped", ""
# ---------------------------------------------------------------------------
# Main tool
# ---------------------------------------------------------------------------
@ -601,8 +700,8 @@ def write_press_releases(
cost_log: list[dict] = []
# ── Step 1: Generate 7 headlines (chat brain) ─────────────────────────
log.info("[PR Pipeline] Step 1/4: Generating 7 headlines for %s...", company_name)
_set_status(ctx, f"Step 1/4: Generating 7 headlines for {company_name}...")
log.info("[PR Pipeline] Step 1/5: Generating 7 headlines for %s...", company_name)
_set_status(ctx, f"Step 1/5: Generating 7 headlines for {company_name}...")
step_start = time.time()
headline_prompt = _build_headline_prompt(topic, company_name, url, lsi_terms, headlines_ref)
messages = [
@ -627,8 +726,8 @@ def write_press_releases(
headlines_file.write_text(headlines_raw.strip(), encoding="utf-8")
# ── Step 2: AI judge picks best 2 (chat brain) ───────────────────────
log.info("[PR Pipeline] Step 2/4: AI judge selecting best 2 headlines...")
_set_status(ctx, "Step 2/4: AI judge selecting best 2 headlines...")
log.info("[PR Pipeline] Step 2/5: AI judge selecting best 2 headlines...")
_set_status(ctx, "Step 2/5: AI judge selecting best 2 headlines...")
step_start = time.time()
judge_prompt = _build_judge_prompt(headlines_raw, headlines_ref, topic)
messages = [
@ -666,7 +765,7 @@ def write_press_releases(
winners = winners[:2]
# ── Step 3: Write 2 press releases (execution brain x 2) ─────────────
log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...")
log.info("[PR Pipeline] Step 3/5: Writing 2 press releases...")
anchor_phrase = _derive_anchor_phrase(company_name, keyword) if keyword else ""
pr_texts: list[str] = []
pr_files: list[str] = []
@ -674,7 +773,7 @@ def write_press_releases(
anchor_warnings: list[str] = []
for i, headline in enumerate(winners):
log.info("[PR Pipeline] Writing PR %d/2: %s", i + 1, headline[:60])
_set_status(ctx, f"Step 3/4: Writing press release {i + 1}/2 — {headline[:60]}...")
_set_status(ctx, f"Step 3/5: Writing press release {i + 1}/2 — {headline[:60]}...")
step_start = time.time()
pr_prompt = _build_pr_prompt(
headline,
@ -737,6 +836,65 @@ def write_press_releases(
text_to_docx(clean_result, docx_path)
docx_files.append(str(docx_path))
# ── Step 3.5: Adversarial fact-check (Sonnet + WebSearch) ───────────
log.info("[PR Pipeline] Step 3.5/5: Running adversarial fact-check...")
fact_check_statuses: list[str] = [] # per-PR: "clean", "corrected", "skipped"
fact_check_changes: list[str] = [] # per-PR change log (empty if clean/skipped)
fact_check_failed = False
for i, pr_text in enumerate(pr_texts):
log.info("[PR Pipeline] Fact-checking PR %d/2...", i + 1)
_set_status(ctx, f"Step 3.5/5: Fact-checking PR {i + 1}/2...")
step_start = time.time()
try:
fc_prompt = _build_fact_check_prompt(
pr_text, company_name, url, topic, keyword
)
fc_result = agent.execute_task(
fc_prompt, tools="WebSearch,WebFetch", model=FACT_CHECK_MODEL
)
corrected, status, changes = _apply_fact_check(fc_result, pr_text)
fact_check_statuses.append(status)
fact_check_changes.append(changes)
if status == "corrected":
pr_texts[i] = corrected
# Re-write files with corrected text
Path(pr_files[i]).write_text(corrected, encoding="utf-8")
text_to_docx(corrected, Path(docx_files[i]))
log.info(
"[PR Pipeline] PR %d: %d correction(s) applied",
i + 1, changes.count("\n") + 1 if changes else 1,
)
elif status == "clean":
log.info("[PR Pipeline] PR %d: no factual errors found", i + 1)
else:
log.warning("[PR Pipeline] PR %d: fact-check skipped (unparseable output)", i + 1)
elapsed = round(time.time() - step_start, 1)
cost_log.append(
{
"step": f"3.5{chr(97 + i)}. Fact-check PR {i + 1}",
"model": FACT_CHECK_MODEL,
"elapsed_s": elapsed,
}
)
except Exception as e:
fact_check_failed = True
fact_check_statuses.append("skipped")
fact_check_changes.append("")
log.warning("[PR Pipeline] PR %d fact-check failed: %s", i + 1, e)
# Notify ClickUp if fact-check could not run at all
if fact_check_failed and clickup_task_id and cu_client:
try:
cu_client.add_comment(
clickup_task_id,
"Note: factual accuracy check could not be run on this PR. "
"Manual review recommended.",
)
except Exception as e:
log.warning("ClickUp fact-check warning failed for %s: %s", clickup_task_id, e)
# ── ClickUp: upload docx attachments + comment ─────────────────────
uploaded_count = 0
failed_uploads: list[str] = []
@ -755,11 +913,27 @@ def write_press_releases(
f"\n[WARNING]Warning: {len(failed_uploads)} attachment(s) failed to upload. "
f"Files saved locally at:\n{paths_list}"
)
# Build fact-check summary for comment
fc_summary = ""
for fi, fc_status in enumerate(fact_check_statuses):
label = f"PR {chr(65 + fi)}"
if fc_status == "corrected":
fc_summary += f"\nFact-check {label}: corrections applied"
if fact_check_changes[fi]:
fc_summary += f"\n {fact_check_changes[fi]}"
elif fc_status == "clean":
fc_summary += f"\nFact-check {label}: no errors found"
else:
fc_summary += (
f"\nFact-check {label}: could not run -- manual review recommended"
)
cu_client.add_comment(
clickup_task_id,
f"📎 Saved {len(docx_files)} press release(s). "
f"Saved {len(docx_files)} press release(s). "
f"{uploaded_count} file(s) attached.\n"
f"Generating JSON-LD schemas next...{upload_warning}",
f"Generating JSON-LD schemas next...{upload_warning}"
f"{fc_summary}",
)
log.info(
"ClickUp: uploaded %d attachments for task %s", uploaded_count, clickup_task_id
@ -768,12 +942,12 @@ def write_press_releases(
log.warning("ClickUp attachment upload failed for %s: %s", clickup_task_id, e)
# ── Step 4: Generate 2 JSON-LD schemas (Sonnet + WebSearch) ───────────
log.info("[PR Pipeline] Step 4/4: Generating 2 JSON-LD schemas...")
log.info("[PR Pipeline] Step 4/5: Generating 2 JSON-LD schemas...")
schema_texts: list[str] = []
schema_files: list[str] = []
for i, pr_text in enumerate(pr_texts):
log.info("[PR Pipeline] Schema %d/2 for: %s", i + 1, winners[i][:60])
_set_status(ctx, f"Step 4/4: Generating schema {i + 1}/2...")
_set_status(ctx, f"Step 4/5: Generating schema {i + 1}/2...")
step_start = time.time()
schema_prompt = _build_schema_prompt(pr_text, company_name, url, schema_skill)
exec_tools = "WebSearch,WebFetch"

View File

@ -16,6 +16,7 @@ from pathlib import Path
from .autocora import archive_result, scan_results, submit_job
from .blm import find_cora_xlsx, run_generate, run_ingest
from .fact_check import fact_check_pr_files
from .claude_runner import (
RunResult,
build_prompt,
@ -632,6 +633,24 @@ def _dispatch_claude(
_cleanup_work_dir(result.work_dir)
return
# 5b. Fact-check PR files (Press Release only, graceful failure)
fc_status_lines: list[str] = []
if task.task_type == "Press Release":
log.info("Running adversarial fact-check for task %s", task.id)
company = task.get_field_value("Client") or ""
pr_topic = task.get_field_value("PR Topic") or ""
pr_keyword = task.get_field_value("Keyword") or ""
pr_url = task.get_field_value("IMSURL") or ""
fc_status_lines, fc_failed = fact_check_pr_files(
result.output_files,
company_name=company,
url=pr_url,
topic=pr_topic,
keyword=pr_keyword,
)
if fc_failed:
log.warning("Fact-check had failures for task %s", task.id)
# 6. Upload output files to ClickUp
uploaded = 0
for f in result.output_files:
@ -651,6 +670,8 @@ def _dispatch_claude(
# 9. Post success comment
summary = "Stage complete. %d file(s) attached." % uploaded
if fc_status_lines:
summary += "\n" + "\n".join(fc_status_lines)
if result.output:
# Include first 500 chars of Claude's output as context
truncated = result.output[:500]

View File

@ -0,0 +1,220 @@
"""Adversarial fact-checker for press release outputs.
Runs a second Claude Code pass on generated PR text files to catch
factual errors. Treats all client-provided data (company name, titles,
URLs, topic) as ground truth and only corrects claims the PR inferred
or fabricated beyond what was given.
Graceful failure: any error returns the original text untouched.
"""
from __future__ import annotations
import logging
import shutil
import subprocess
from pathlib import Path
log = logging.getLogger(__name__)
FACT_CHECK_MODEL = "sonnet"
FACT_CHECK_TIMEOUT = 300 # 5 minutes per PR
def build_fact_check_prompt(
pr_text: str,
company_name: str,
url: str,
topic: str,
keyword: str,
) -> str:
"""Build the prompt for the adversarial fact-checker."""
return (
"You are a factual accuracy reviewer for press releases. Your ONLY job is to "
"find and correct statements that are factually wrong. You are NOT an editor.\n\n"
"GROUND TRUTH -- the following data was provided by the client and is correct "
"by definition. Do NOT change, question, or 'correct' any of it, even if your "
"web search suggests something different:\n"
" - Company name: %s\n"
" - Target URL: %s\n"
" - Topic: %s\n"
" - Keyword: %s\n"
" - Any person names, titles, quotes, or contact details in the PR\n"
" - Any product names, service names, or brand names\n"
" - The overall framing, angle, and tone of the PR\n\n"
"WHAT TO CHECK (use WebSearch/WebFetch to verify):\n"
" - Industry statistics or market size claims\n"
" - Historical dates or facts\n"
" - Technical specifications not sourced from the client data\n"
" - General knowledge claims (e.g. 'X is the leading cause of Y')\n"
" - Geographic or regulatory facts\n\n"
"RULES:\n"
" - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n"
" - Do NOT add content, remove content, restructure, or 'improve' anything\n"
" - Do NOT change tone, style, word choice, or sentence structure\n"
" - Do NOT suggest additions or enhancements\n"
" - Make the MINIMUM change needed to fix each error\n"
" - Preserve the exact formatting, paragraph breaks, and headline\n\n"
"OUTPUT FORMAT:\n"
" - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n"
" - If you find errors: output [CORRECTED] on the first line, then the full "
"corrected PR text (preserving all formatting), then a blank line, then "
"CHANGES: followed by a numbered list of what you changed and why\n\n"
"Press release to review:\n"
"---\n"
"%s\n"
"---"
) % (company_name, url, topic, keyword, pr_text)
def apply_fact_check(raw_output: str, original_text: str) -> tuple[str, str, str]:
"""Parse fact-checker output. Returns (text, status, changes).
status is one of: "clean", "corrected", "skipped"
On any parse failure or suspect rewrite, returns original text unchanged.
"""
if not raw_output or not raw_output.strip():
return original_text, "skipped", ""
stripped = raw_output.strip()
# No errors found
if stripped.startswith("[NO_ERRORS]"):
return original_text, "clean", ""
# Corrections found
if stripped.startswith("[CORRECTED]"):
body = stripped[len("[CORRECTED]"):].strip()
# Split into corrected text and change log
changes = ""
if "\nCHANGES:" in body:
text_part, changes = body.split("\nCHANGES:", 1)
corrected = text_part.strip()
changes = changes.strip()
else:
corrected = body
if not corrected:
return original_text, "skipped", ""
# Safety: reject if word count differs by more than 15%
orig_wc = len(original_text.split())
new_wc = len(corrected.split())
if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15:
log.warning(
"Fact-check rejected: word count changed too much "
"(%d -> %d, %.0f%% delta)",
orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100,
)
return original_text, "skipped", "rejected -- word count delta too large"
return corrected, "corrected", changes
# Unparseable output
return original_text, "skipped", ""
def fact_check_pr_files(
output_files: list[Path],
company_name: str,
url: str,
topic: str,
keyword: str,
timeout: int = FACT_CHECK_TIMEOUT,
) -> tuple[list[str], bool]:
"""Run fact-check on .txt PR files in the output list.
Returns:
(status_lines, any_failed) where status_lines is a list of
human-readable results per PR, and any_failed is True if the
fact-checker could not run on at least one PR.
"""
claude_bin = shutil.which("claude")
if not claude_bin:
log.warning("Fact-check: claude CLI not found, skipping")
return ["Fact-check: claude CLI not found, skipped"], True
txt_files = [f for f in output_files if f.suffix == ".txt"]
# Skip non-PR files like "Headlines Evaluation.md"
# PR files are the .txt files (the actual press releases)
if not txt_files:
return [], False
status_lines: list[str] = []
any_failed = False
for i, txt_file in enumerate(txt_files):
label = "PR %s" % chr(65 + i) # PR A, PR B, etc.
try:
original = txt_file.read_text(encoding="utf-8")
if not original.strip():
continue
prompt = build_fact_check_prompt(
original, company_name, url, topic, keyword
)
cmd = [
claude_bin,
"-p", prompt,
"--output-format", "text",
"--permission-mode", "bypassPermissions",
"--allowedTools", "WebSearch,WebFetch",
"--max-turns", "10",
"--model", FACT_CHECK_MODEL,
]
log.info("Fact-checking %s: %s", label, txt_file.name)
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=str(txt_file.parent),
)
if result.returncode != 0:
log.warning(
"Fact-check %s failed (exit %d): %s",
label, result.returncode, (result.stderr or "")[:500],
)
status_lines.append(
"Fact-check %s: could not run -- manual review recommended" % label
)
any_failed = True
continue
corrected, status, changes = apply_fact_check(result.stdout, original)
if status == "corrected":
txt_file.write_text(corrected, encoding="utf-8")
log.info("Fact-check %s: corrections applied", label)
line = "Fact-check %s: corrections applied" % label
if changes:
line += "\n %s" % changes
status_lines.append(line)
elif status == "clean":
log.info("Fact-check %s: no errors found", label)
status_lines.append("Fact-check %s: no errors found" % label)
else:
log.warning("Fact-check %s: skipped (unparseable output)", label)
status_lines.append(
"Fact-check %s: could not run -- manual review recommended" % label
)
any_failed = True
except subprocess.TimeoutExpired:
log.warning("Fact-check %s timed out after %ds", label, timeout)
status_lines.append(
"Fact-check %s: timed out -- manual review recommended" % label
)
any_failed = True
except Exception as e:
log.warning("Fact-check %s error: %s", label, e)
status_lines.append(
"Fact-check %s: could not run -- manual review recommended" % label
)
any_failed = True
return status_lines, any_failed

View File

@ -0,0 +1,126 @@
"""Tests for the adversarial fact-checker helpers in press_release.py."""
from cheddahbot.tools.press_release import _apply_fact_check, _build_fact_check_prompt
class TestApplyFactCheck:
"""Tests for _apply_fact_check output parsing."""
ORIGINAL = (
"Acme Corp Delivers Advanced Widget Solutions\n\n"
"Acme Corp, a leading manufacturer of widgets, today highlighted "
"its expanded product line. The company, based in Milwaukee, Wisconsin, "
"produces over 500 widget variants for industrial applications."
)
def test_no_errors_returns_original(self):
text, status, changes = _apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
assert status == "clean"
assert text == self.ORIGINAL
assert changes == ""
def test_no_errors_with_trailing_whitespace(self):
text, status, changes = _apply_fact_check("[NO_ERRORS] \n", self.ORIGINAL)
assert status == "clean"
assert text == self.ORIGINAL
def test_corrected_with_changes(self):
corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
raw = (
f"[CORRECTED]\n{corrected_pr}\n\n"
"CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
"-- company website lists 300."
)
text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "300 widget" in text
assert "500" not in text
assert "300 widget variants" in changes
def test_corrected_without_changes_section(self):
corrected_pr = self.ORIGINAL.replace("500", "300")
raw = f"[CORRECTED]\n{corrected_pr}"
text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "300" in text
assert changes == ""
def test_empty_output_returns_skipped(self):
text, status, changes = _apply_fact_check("", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
def test_none_like_output_returns_skipped(self):
text, status, changes = _apply_fact_check(" \n ", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
def test_garbage_output_returns_skipped(self):
text, status, changes = _apply_fact_check(
"I reviewed the press release and it looks good overall.", self.ORIGINAL
)
assert status == "skipped"
assert text == self.ORIGINAL
def test_rejects_oversized_rewrite(self):
"""If fact-checker rewrites too much (>15% word count delta), reject."""
# Double the content -- way more than 15%
bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content here."
raw = f"[CORRECTED]\n{bloated}\n\nCHANGES:\n1. Added more detail."
text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
assert "word count delta" in changes
def test_accepts_minor_word_count_change(self):
"""Small changes (within 15%) should be accepted."""
# Change one word -- well within 15%
minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
raw = (
f"[CORRECTED]\n{minor_edit}\n\n"
"CHANGES:\n1. Corrected variant count from 500 to 480."
)
text, status, changes = _apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "480" in text
def test_corrected_but_empty_body_returns_skipped(self):
text, status, changes = _apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
class TestBuildFactCheckPrompt:
"""Tests for _build_fact_check_prompt structure."""
def test_includes_ground_truth_data(self):
prompt = _build_fact_check_prompt(
"Some PR text here.",
company_name="Acme Corp",
url="https://acme.com",
topic="widgets",
keyword="industrial widgets",
)
assert "Acme Corp" in prompt
assert "https://acme.com" in prompt
assert "widgets" in prompt
assert "industrial widgets" in prompt
assert "ground truth" in prompt.lower() or "GROUND TRUTH" in prompt
def test_includes_pr_text(self):
prompt = _build_fact_check_prompt(
"The quick brown fox.",
company_name="Test",
url="https://test.com",
topic="foxes",
keyword="brown fox",
)
assert "The quick brown fox." in prompt
def test_output_format_instructions(self):
prompt = _build_fact_check_prompt(
"Text.", company_name="X", url="u", topic="t", keyword="k"
)
assert "[NO_ERRORS]" in prompt
assert "[CORRECTED]" in prompt
assert "CHANGES:" in prompt

View File

@ -0,0 +1,122 @@
"""Tests for clickup_runner.fact_check module."""
from clickup_runner.fact_check import apply_fact_check, build_fact_check_prompt
class TestApplyFactCheck:
"""Tests for apply_fact_check output parsing."""
ORIGINAL = (
"Acme Corp Delivers Advanced Widget Solutions\n\n"
"Acme Corp, a leading manufacturer of widgets, today highlighted "
"its expanded product line. The company, based in Milwaukee, Wisconsin, "
"produces over 500 widget variants for industrial applications."
)
def test_no_errors_returns_original(self):
text, status, changes = apply_fact_check("[NO_ERRORS]", self.ORIGINAL)
assert status == "clean"
assert text == self.ORIGINAL
assert changes == ""
def test_no_errors_with_trailing_whitespace(self):
text, status, changes = apply_fact_check("[NO_ERRORS] \n", self.ORIGINAL)
assert status == "clean"
assert text == self.ORIGINAL
def test_corrected_with_changes(self):
corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget")
raw = (
"[CORRECTED]\n%s\n\n"
"CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' "
"-- company website lists 300." % corrected_pr
)
text, status, changes = apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "300 widget" in text
assert "500" not in text
assert "300 widget variants" in changes
def test_corrected_without_changes_section(self):
corrected_pr = self.ORIGINAL.replace("500", "300")
raw = "[CORRECTED]\n%s" % corrected_pr
text, status, changes = apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "300" in text
assert changes == ""
def test_empty_output_returns_skipped(self):
text, status, changes = apply_fact_check("", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
def test_whitespace_only_returns_skipped(self):
text, status, changes = apply_fact_check(" \n ", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
def test_garbage_output_returns_skipped(self):
text, status, changes = apply_fact_check(
"I reviewed the press release and it looks good overall.", self.ORIGINAL
)
assert status == "skipped"
assert text == self.ORIGINAL
def test_rejects_oversized_rewrite(self):
bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content."
raw = "[CORRECTED]\n%s\n\nCHANGES:\n1. Added more detail." % bloated
text, status, changes = apply_fact_check(raw, self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
assert "word count delta" in changes
def test_accepts_minor_word_count_change(self):
minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants")
raw = (
"[CORRECTED]\n%s\n\n"
"CHANGES:\n1. Corrected variant count from 500 to 480." % minor_edit
)
text, status, changes = apply_fact_check(raw, self.ORIGINAL)
assert status == "corrected"
assert "480" in text
def test_corrected_empty_body_returns_skipped(self):
text, status, changes = apply_fact_check("[CORRECTED]\n", self.ORIGINAL)
assert status == "skipped"
assert text == self.ORIGINAL
class TestBuildFactCheckPrompt:
"""Tests for build_fact_check_prompt structure."""
def test_includes_ground_truth_data(self):
prompt = build_fact_check_prompt(
"Some PR text.",
company_name="Acme Corp",
url="https://acme.com",
topic="widgets",
keyword="industrial widgets",
)
assert "Acme Corp" in prompt
assert "https://acme.com" in prompt
assert "widgets" in prompt
assert "industrial widgets" in prompt
assert "GROUND TRUTH" in prompt
def test_includes_pr_text(self):
prompt = build_fact_check_prompt(
"The quick brown fox.",
company_name="Test",
url="https://test.com",
topic="foxes",
keyword="brown fox",
)
assert "The quick brown fox." in prompt
def test_output_format_instructions(self):
prompt = build_fact_check_prompt(
"Text.", company_name="X", url="u", topic="t", keyword="k"
)
assert "[NO_ERRORS]" in prompt
assert "[CORRECTED]" in prompt
assert "CHANGES:" in prompt