diff --git a/cheddahbot/tools/press_release.py b/cheddahbot/tools/press_release.py index 914d31c..62e1bf3 100644 --- a/cheddahbot/tools/press_release.py +++ b/cheddahbot/tools/press_release.py @@ -1,11 +1,12 @@ """Press-release pipeline tool. Autonomous workflow: - 1. Generate 7 compliant headlines (chat brain) - 2. AI judge picks the 2 best (chat brain) - 3. Write 2 full press releases (execution brain x 2) - 4. Generate 2 JSON-LD schemas (execution brain x 2, Sonnet + WebSearch) - 5. Save 4 files, return cost summary + 1. Generate 7 compliant headlines (chat brain) + 2. AI judge picks the 2 best (chat brain) + 3. Write 2 full press releases (execution brain x 2) + 3.5. Adversarial fact-check (Sonnet + WebSearch, graceful failure) + 4. Generate 2 JSON-LD schemas (execution brain x 2, Sonnet + WebSearch) + 5. Save files, return cost summary """ from __future__ import annotations @@ -35,6 +36,7 @@ _COMPANIES_FILE = _SKILLS_DIR / "companies.md" _HEADLINES_FILE = _SKILLS_DIR / "headlines.md" SONNET_CLI_MODEL = "sonnet" +FACT_CHECK_MODEL = "sonnet" def _set_status(ctx: dict | None, message: str) -> None: @@ -524,6 +526,103 @@ def _build_schema_prompt(pr_text: str, company_name: str, url: str, skill_text: return prompt +def _build_fact_check_prompt( + pr_text: str, + company_name: str, + url: str, + topic: str, + keyword: str, +) -> str: + """Build the prompt for the adversarial fact-checker step.""" + return ( + "You are a factual accuracy reviewer for press releases. Your ONLY job is to " + "find and correct statements that are factually wrong. You are NOT an editor.\n\n" + "GROUND TRUTH -- the following data was provided by the client and is correct " + "by definition. Do NOT change, question, or 'correct' any of it, even if your " + "web search suggests something different:\n" + f" - Company name: {company_name}\n" + f" - Target URL: {url}\n" + f" - Topic: {topic}\n" + f" - Keyword: {keyword}\n" + " - Any person names, titles, quotes, or contact details in the PR\n" + " - Any product names, service names, or brand names\n" + " - The overall framing, angle, and tone of the PR\n\n" + "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n" + " - Industry statistics or market size claims\n" + " - Historical dates or facts\n" + " - Technical specifications not sourced from the client data\n" + " - General knowledge claims (e.g. 'X is the leading cause of Y')\n" + " - Geographic or regulatory facts\n\n" + "RULES:\n" + " - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n" + " - Do NOT add content, remove content, restructure, or 'improve' anything\n" + " - Do NOT change tone, style, word choice, or sentence structure\n" + " - Do NOT suggest additions or enhancements\n" + " - Make the MINIMUM change needed to fix each error\n" + " - Preserve the exact formatting, paragraph breaks, and headline\n\n" + "OUTPUT FORMAT:\n" + " - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n" + " - If you find errors: output [CORRECTED] on the first line, then the full " + "corrected PR text (preserving all formatting), then a blank line, then " + "CHANGES: followed by a numbered list of what you changed and why\n\n" + "Press release to review:\n" + "---\n" + f"{pr_text}\n" + "---" + ) + + +def _apply_fact_check( + raw_output: str, original_text: str +) -> tuple[str, str, str]: + """Parse fact-checker output. Returns (text, status, changes). + + status is one of: "clean", "corrected", "skipped" + On any parse failure or suspect rewrite, returns original text unchanged. + """ + if not raw_output or not raw_output.strip(): + return original_text, "skipped", "" + + stripped = raw_output.strip() + + # No errors found + if stripped.startswith("[NO_ERRORS]"): + return original_text, "clean", "" + + # Corrections found + if stripped.startswith("[CORRECTED]"): + # Split off the [CORRECTED] prefix + body = stripped[len("[CORRECTED]"):].strip() + + # Split into corrected text and change log + changes = "" + if "\nCHANGES:" in body: + text_part, changes = body.split("\nCHANGES:", 1) + corrected = text_part.strip() + changes = changes.strip() + else: + corrected = body + + if not corrected: + return original_text, "skipped", "" + + # Safety: reject if word count differs by more than 15% + orig_wc = _word_count(original_text) + new_wc = _word_count(corrected) + if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15: + log.warning( + "Fact-check rejected: word count changed too much " + "(%d -> %d, %.0f%% delta)", + orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100, + ) + return original_text, "skipped", "rejected -- word count delta too large" + + return corrected, "corrected", changes + + # Unparseable output + return original_text, "skipped", "" + + # --------------------------------------------------------------------------- # Main tool # --------------------------------------------------------------------------- @@ -601,8 +700,8 @@ def write_press_releases( cost_log: list[dict] = [] # ── Step 1: Generate 7 headlines (chat brain) ───────────────────────── - log.info("[PR Pipeline] Step 1/4: Generating 7 headlines for %s...", company_name) - _set_status(ctx, f"Step 1/4: Generating 7 headlines for {company_name}...") + log.info("[PR Pipeline] Step 1/5: Generating 7 headlines for %s...", company_name) + _set_status(ctx, f"Step 1/5: Generating 7 headlines for {company_name}...") step_start = time.time() headline_prompt = _build_headline_prompt(topic, company_name, url, lsi_terms, headlines_ref) messages = [ @@ -627,8 +726,8 @@ def write_press_releases( headlines_file.write_text(headlines_raw.strip(), encoding="utf-8") # ── Step 2: AI judge picks best 2 (chat brain) ─────────────────────── - log.info("[PR Pipeline] Step 2/4: AI judge selecting best 2 headlines...") - _set_status(ctx, "Step 2/4: AI judge selecting best 2 headlines...") + log.info("[PR Pipeline] Step 2/5: AI judge selecting best 2 headlines...") + _set_status(ctx, "Step 2/5: AI judge selecting best 2 headlines...") step_start = time.time() judge_prompt = _build_judge_prompt(headlines_raw, headlines_ref, topic) messages = [ @@ -666,7 +765,7 @@ def write_press_releases( winners = winners[:2] # ── Step 3: Write 2 press releases (execution brain x 2) ───────────── - log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...") + log.info("[PR Pipeline] Step 3/5: Writing 2 press releases...") anchor_phrase = _derive_anchor_phrase(company_name, keyword) if keyword else "" pr_texts: list[str] = [] pr_files: list[str] = [] @@ -674,7 +773,7 @@ def write_press_releases( anchor_warnings: list[str] = [] for i, headline in enumerate(winners): log.info("[PR Pipeline] Writing PR %d/2: %s", i + 1, headline[:60]) - _set_status(ctx, f"Step 3/4: Writing press release {i + 1}/2 — {headline[:60]}...") + _set_status(ctx, f"Step 3/5: Writing press release {i + 1}/2 — {headline[:60]}...") step_start = time.time() pr_prompt = _build_pr_prompt( headline, @@ -737,6 +836,65 @@ def write_press_releases( text_to_docx(clean_result, docx_path) docx_files.append(str(docx_path)) + # ── Step 3.5: Adversarial fact-check (Sonnet + WebSearch) ─────────── + log.info("[PR Pipeline] Step 3.5/5: Running adversarial fact-check...") + fact_check_statuses: list[str] = [] # per-PR: "clean", "corrected", "skipped" + fact_check_changes: list[str] = [] # per-PR change log (empty if clean/skipped) + fact_check_failed = False + for i, pr_text in enumerate(pr_texts): + log.info("[PR Pipeline] Fact-checking PR %d/2...", i + 1) + _set_status(ctx, f"Step 3.5/5: Fact-checking PR {i + 1}/2...") + step_start = time.time() + try: + fc_prompt = _build_fact_check_prompt( + pr_text, company_name, url, topic, keyword + ) + fc_result = agent.execute_task( + fc_prompt, tools="WebSearch,WebFetch", model=FACT_CHECK_MODEL + ) + corrected, status, changes = _apply_fact_check(fc_result, pr_text) + fact_check_statuses.append(status) + fact_check_changes.append(changes) + + if status == "corrected": + pr_texts[i] = corrected + # Re-write files with corrected text + Path(pr_files[i]).write_text(corrected, encoding="utf-8") + text_to_docx(corrected, Path(docx_files[i])) + log.info( + "[PR Pipeline] PR %d: %d correction(s) applied", + i + 1, changes.count("\n") + 1 if changes else 1, + ) + elif status == "clean": + log.info("[PR Pipeline] PR %d: no factual errors found", i + 1) + else: + log.warning("[PR Pipeline] PR %d: fact-check skipped (unparseable output)", i + 1) + + elapsed = round(time.time() - step_start, 1) + cost_log.append( + { + "step": f"3.5{chr(97 + i)}. Fact-check PR {i + 1}", + "model": FACT_CHECK_MODEL, + "elapsed_s": elapsed, + } + ) + except Exception as e: + fact_check_failed = True + fact_check_statuses.append("skipped") + fact_check_changes.append("") + log.warning("[PR Pipeline] PR %d fact-check failed: %s", i + 1, e) + + # Notify ClickUp if fact-check could not run at all + if fact_check_failed and clickup_task_id and cu_client: + try: + cu_client.add_comment( + clickup_task_id, + "Note: factual accuracy check could not be run on this PR. " + "Manual review recommended.", + ) + except Exception as e: + log.warning("ClickUp fact-check warning failed for %s: %s", clickup_task_id, e) + # ── ClickUp: upload docx attachments + comment ───────────────────── uploaded_count = 0 failed_uploads: list[str] = [] @@ -755,11 +913,27 @@ def write_press_releases( f"\n[WARNING]Warning: {len(failed_uploads)} attachment(s) failed to upload. " f"Files saved locally at:\n{paths_list}" ) + # Build fact-check summary for comment + fc_summary = "" + for fi, fc_status in enumerate(fact_check_statuses): + label = f"PR {chr(65 + fi)}" + if fc_status == "corrected": + fc_summary += f"\nFact-check {label}: corrections applied" + if fact_check_changes[fi]: + fc_summary += f"\n {fact_check_changes[fi]}" + elif fc_status == "clean": + fc_summary += f"\nFact-check {label}: no errors found" + else: + fc_summary += ( + f"\nFact-check {label}: could not run -- manual review recommended" + ) + cu_client.add_comment( clickup_task_id, - f"📎 Saved {len(docx_files)} press release(s). " + f"Saved {len(docx_files)} press release(s). " f"{uploaded_count} file(s) attached.\n" - f"Generating JSON-LD schemas next...{upload_warning}", + f"Generating JSON-LD schemas next...{upload_warning}" + f"{fc_summary}", ) log.info( "ClickUp: uploaded %d attachments for task %s", uploaded_count, clickup_task_id @@ -768,12 +942,12 @@ def write_press_releases( log.warning("ClickUp attachment upload failed for %s: %s", clickup_task_id, e) # ── Step 4: Generate 2 JSON-LD schemas (Sonnet + WebSearch) ─────────── - log.info("[PR Pipeline] Step 4/4: Generating 2 JSON-LD schemas...") + log.info("[PR Pipeline] Step 4/5: Generating 2 JSON-LD schemas...") schema_texts: list[str] = [] schema_files: list[str] = [] for i, pr_text in enumerate(pr_texts): log.info("[PR Pipeline] Schema %d/2 for: %s", i + 1, winners[i][:60]) - _set_status(ctx, f"Step 4/4: Generating schema {i + 1}/2...") + _set_status(ctx, f"Step 4/5: Generating schema {i + 1}/2...") step_start = time.time() schema_prompt = _build_schema_prompt(pr_text, company_name, url, schema_skill) exec_tools = "WebSearch,WebFetch" diff --git a/clickup_runner/__main__.py b/clickup_runner/__main__.py index 176cf7d..06e9358 100644 --- a/clickup_runner/__main__.py +++ b/clickup_runner/__main__.py @@ -16,6 +16,7 @@ from pathlib import Path from .autocora import archive_result, scan_results, submit_job from .blm import find_cora_xlsx, run_generate, run_ingest +from .fact_check import fact_check_pr_files from .claude_runner import ( RunResult, build_prompt, @@ -632,6 +633,24 @@ def _dispatch_claude( _cleanup_work_dir(result.work_dir) return + # 5b. Fact-check PR files (Press Release only, graceful failure) + fc_status_lines: list[str] = [] + if task.task_type == "Press Release": + log.info("Running adversarial fact-check for task %s", task.id) + company = task.get_field_value("Client") or "" + pr_topic = task.get_field_value("PR Topic") or "" + pr_keyword = task.get_field_value("Keyword") or "" + pr_url = task.get_field_value("IMSURL") or "" + fc_status_lines, fc_failed = fact_check_pr_files( + result.output_files, + company_name=company, + url=pr_url, + topic=pr_topic, + keyword=pr_keyword, + ) + if fc_failed: + log.warning("Fact-check had failures for task %s", task.id) + # 6. Upload output files to ClickUp uploaded = 0 for f in result.output_files: @@ -651,6 +670,8 @@ def _dispatch_claude( # 9. Post success comment summary = "Stage complete. %d file(s) attached." % uploaded + if fc_status_lines: + summary += "\n" + "\n".join(fc_status_lines) if result.output: # Include first 500 chars of Claude's output as context truncated = result.output[:500] diff --git a/clickup_runner/fact_check.py b/clickup_runner/fact_check.py new file mode 100644 index 0000000..a950f4f --- /dev/null +++ b/clickup_runner/fact_check.py @@ -0,0 +1,220 @@ +"""Adversarial fact-checker for press release outputs. + +Runs a second Claude Code pass on generated PR text files to catch +factual errors. Treats all client-provided data (company name, titles, +URLs, topic) as ground truth and only corrects claims the PR inferred +or fabricated beyond what was given. + +Graceful failure: any error returns the original text untouched. +""" + +from __future__ import annotations + +import logging +import shutil +import subprocess +from pathlib import Path + +log = logging.getLogger(__name__) + +FACT_CHECK_MODEL = "sonnet" +FACT_CHECK_TIMEOUT = 300 # 5 minutes per PR + + +def build_fact_check_prompt( + pr_text: str, + company_name: str, + url: str, + topic: str, + keyword: str, +) -> str: + """Build the prompt for the adversarial fact-checker.""" + return ( + "You are a factual accuracy reviewer for press releases. Your ONLY job is to " + "find and correct statements that are factually wrong. You are NOT an editor.\n\n" + "GROUND TRUTH -- the following data was provided by the client and is correct " + "by definition. Do NOT change, question, or 'correct' any of it, even if your " + "web search suggests something different:\n" + " - Company name: %s\n" + " - Target URL: %s\n" + " - Topic: %s\n" + " - Keyword: %s\n" + " - Any person names, titles, quotes, or contact details in the PR\n" + " - Any product names, service names, or brand names\n" + " - The overall framing, angle, and tone of the PR\n\n" + "WHAT TO CHECK (use WebSearch/WebFetch to verify):\n" + " - Industry statistics or market size claims\n" + " - Historical dates or facts\n" + " - Technical specifications not sourced from the client data\n" + " - General knowledge claims (e.g. 'X is the leading cause of Y')\n" + " - Geographic or regulatory facts\n\n" + "RULES:\n" + " - ONLY fix actual factual errors -- wrong numbers, wrong dates, wrong facts\n" + " - Do NOT add content, remove content, restructure, or 'improve' anything\n" + " - Do NOT change tone, style, word choice, or sentence structure\n" + " - Do NOT suggest additions or enhancements\n" + " - Make the MINIMUM change needed to fix each error\n" + " - Preserve the exact formatting, paragraph breaks, and headline\n\n" + "OUTPUT FORMAT:\n" + " - If you find NO factual errors: output exactly [NO_ERRORS] and nothing else\n" + " - If you find errors: output [CORRECTED] on the first line, then the full " + "corrected PR text (preserving all formatting), then a blank line, then " + "CHANGES: followed by a numbered list of what you changed and why\n\n" + "Press release to review:\n" + "---\n" + "%s\n" + "---" + ) % (company_name, url, topic, keyword, pr_text) + + +def apply_fact_check(raw_output: str, original_text: str) -> tuple[str, str, str]: + """Parse fact-checker output. Returns (text, status, changes). + + status is one of: "clean", "corrected", "skipped" + On any parse failure or suspect rewrite, returns original text unchanged. + """ + if not raw_output or not raw_output.strip(): + return original_text, "skipped", "" + + stripped = raw_output.strip() + + # No errors found + if stripped.startswith("[NO_ERRORS]"): + return original_text, "clean", "" + + # Corrections found + if stripped.startswith("[CORRECTED]"): + body = stripped[len("[CORRECTED]"):].strip() + + # Split into corrected text and change log + changes = "" + if "\nCHANGES:" in body: + text_part, changes = body.split("\nCHANGES:", 1) + corrected = text_part.strip() + changes = changes.strip() + else: + corrected = body + + if not corrected: + return original_text, "skipped", "" + + # Safety: reject if word count differs by more than 15% + orig_wc = len(original_text.split()) + new_wc = len(corrected.split()) + if orig_wc > 0 and abs(new_wc - orig_wc) / orig_wc > 0.15: + log.warning( + "Fact-check rejected: word count changed too much " + "(%d -> %d, %.0f%% delta)", + orig_wc, new_wc, abs(new_wc - orig_wc) / orig_wc * 100, + ) + return original_text, "skipped", "rejected -- word count delta too large" + + return corrected, "corrected", changes + + # Unparseable output + return original_text, "skipped", "" + + +def fact_check_pr_files( + output_files: list[Path], + company_name: str, + url: str, + topic: str, + keyword: str, + timeout: int = FACT_CHECK_TIMEOUT, +) -> tuple[list[str], bool]: + """Run fact-check on .txt PR files in the output list. + + Returns: + (status_lines, any_failed) where status_lines is a list of + human-readable results per PR, and any_failed is True if the + fact-checker could not run on at least one PR. + """ + claude_bin = shutil.which("claude") + if not claude_bin: + log.warning("Fact-check: claude CLI not found, skipping") + return ["Fact-check: claude CLI not found, skipped"], True + + txt_files = [f for f in output_files if f.suffix == ".txt"] + # Skip non-PR files like "Headlines Evaluation.md" + # PR files are the .txt files (the actual press releases) + if not txt_files: + return [], False + + status_lines: list[str] = [] + any_failed = False + + for i, txt_file in enumerate(txt_files): + label = "PR %s" % chr(65 + i) # PR A, PR B, etc. + try: + original = txt_file.read_text(encoding="utf-8") + if not original.strip(): + continue + + prompt = build_fact_check_prompt( + original, company_name, url, topic, keyword + ) + + cmd = [ + claude_bin, + "-p", prompt, + "--output-format", "text", + "--permission-mode", "bypassPermissions", + "--allowedTools", "WebSearch,WebFetch", + "--max-turns", "10", + "--model", FACT_CHECK_MODEL, + ] + + log.info("Fact-checking %s: %s", label, txt_file.name) + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + cwd=str(txt_file.parent), + ) + + if result.returncode != 0: + log.warning( + "Fact-check %s failed (exit %d): %s", + label, result.returncode, (result.stderr or "")[:500], + ) + status_lines.append( + "Fact-check %s: could not run -- manual review recommended" % label + ) + any_failed = True + continue + + corrected, status, changes = apply_fact_check(result.stdout, original) + + if status == "corrected": + txt_file.write_text(corrected, encoding="utf-8") + log.info("Fact-check %s: corrections applied", label) + line = "Fact-check %s: corrections applied" % label + if changes: + line += "\n %s" % changes + status_lines.append(line) + elif status == "clean": + log.info("Fact-check %s: no errors found", label) + status_lines.append("Fact-check %s: no errors found" % label) + else: + log.warning("Fact-check %s: skipped (unparseable output)", label) + status_lines.append( + "Fact-check %s: could not run -- manual review recommended" % label + ) + any_failed = True + + except subprocess.TimeoutExpired: + log.warning("Fact-check %s timed out after %ds", label, timeout) + status_lines.append( + "Fact-check %s: timed out -- manual review recommended" % label + ) + any_failed = True + except Exception as e: + log.warning("Fact-check %s error: %s", label, e) + status_lines.append( + "Fact-check %s: could not run -- manual review recommended" % label + ) + any_failed = True + + return status_lines, any_failed diff --git a/tests/test_fact_check.py b/tests/test_fact_check.py new file mode 100644 index 0000000..699c903 --- /dev/null +++ b/tests/test_fact_check.py @@ -0,0 +1,126 @@ +"""Tests for the adversarial fact-checker helpers in press_release.py.""" + +from cheddahbot.tools.press_release import _apply_fact_check, _build_fact_check_prompt + + +class TestApplyFactCheck: + """Tests for _apply_fact_check output parsing.""" + + ORIGINAL = ( + "Acme Corp Delivers Advanced Widget Solutions\n\n" + "Acme Corp, a leading manufacturer of widgets, today highlighted " + "its expanded product line. The company, based in Milwaukee, Wisconsin, " + "produces over 500 widget variants for industrial applications." + ) + + def test_no_errors_returns_original(self): + text, status, changes = _apply_fact_check("[NO_ERRORS]", self.ORIGINAL) + assert status == "clean" + assert text == self.ORIGINAL + assert changes == "" + + def test_no_errors_with_trailing_whitespace(self): + text, status, changes = _apply_fact_check("[NO_ERRORS] \n", self.ORIGINAL) + assert status == "clean" + assert text == self.ORIGINAL + + def test_corrected_with_changes(self): + corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget") + raw = ( + f"[CORRECTED]\n{corrected_pr}\n\n" + "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' " + "-- company website lists 300." + ) + text, status, changes = _apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "300 widget" in text + assert "500" not in text + assert "300 widget variants" in changes + + def test_corrected_without_changes_section(self): + corrected_pr = self.ORIGINAL.replace("500", "300") + raw = f"[CORRECTED]\n{corrected_pr}" + text, status, changes = _apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "300" in text + assert changes == "" + + def test_empty_output_returns_skipped(self): + text, status, changes = _apply_fact_check("", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_none_like_output_returns_skipped(self): + text, status, changes = _apply_fact_check(" \n ", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_garbage_output_returns_skipped(self): + text, status, changes = _apply_fact_check( + "I reviewed the press release and it looks good overall.", self.ORIGINAL + ) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_rejects_oversized_rewrite(self): + """If fact-checker rewrites too much (>15% word count delta), reject.""" + # Double the content -- way more than 15% + bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content here." + raw = f"[CORRECTED]\n{bloated}\n\nCHANGES:\n1. Added more detail." + text, status, changes = _apply_fact_check(raw, self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + assert "word count delta" in changes + + def test_accepts_minor_word_count_change(self): + """Small changes (within 15%) should be accepted.""" + # Change one word -- well within 15% + minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants") + raw = ( + f"[CORRECTED]\n{minor_edit}\n\n" + "CHANGES:\n1. Corrected variant count from 500 to 480." + ) + text, status, changes = _apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "480" in text + + def test_corrected_but_empty_body_returns_skipped(self): + text, status, changes = _apply_fact_check("[CORRECTED]\n", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + +class TestBuildFactCheckPrompt: + """Tests for _build_fact_check_prompt structure.""" + + def test_includes_ground_truth_data(self): + prompt = _build_fact_check_prompt( + "Some PR text here.", + company_name="Acme Corp", + url="https://acme.com", + topic="widgets", + keyword="industrial widgets", + ) + assert "Acme Corp" in prompt + assert "https://acme.com" in prompt + assert "widgets" in prompt + assert "industrial widgets" in prompt + assert "ground truth" in prompt.lower() or "GROUND TRUTH" in prompt + + def test_includes_pr_text(self): + prompt = _build_fact_check_prompt( + "The quick brown fox.", + company_name="Test", + url="https://test.com", + topic="foxes", + keyword="brown fox", + ) + assert "The quick brown fox." in prompt + + def test_output_format_instructions(self): + prompt = _build_fact_check_prompt( + "Text.", company_name="X", url="u", topic="t", keyword="k" + ) + assert "[NO_ERRORS]" in prompt + assert "[CORRECTED]" in prompt + assert "CHANGES:" in prompt diff --git a/tests/test_fact_check_runner.py b/tests/test_fact_check_runner.py new file mode 100644 index 0000000..c12b112 --- /dev/null +++ b/tests/test_fact_check_runner.py @@ -0,0 +1,122 @@ +"""Tests for clickup_runner.fact_check module.""" + +from clickup_runner.fact_check import apply_fact_check, build_fact_check_prompt + + +class TestApplyFactCheck: + """Tests for apply_fact_check output parsing.""" + + ORIGINAL = ( + "Acme Corp Delivers Advanced Widget Solutions\n\n" + "Acme Corp, a leading manufacturer of widgets, today highlighted " + "its expanded product line. The company, based in Milwaukee, Wisconsin, " + "produces over 500 widget variants for industrial applications." + ) + + def test_no_errors_returns_original(self): + text, status, changes = apply_fact_check("[NO_ERRORS]", self.ORIGINAL) + assert status == "clean" + assert text == self.ORIGINAL + assert changes == "" + + def test_no_errors_with_trailing_whitespace(self): + text, status, changes = apply_fact_check("[NO_ERRORS] \n", self.ORIGINAL) + assert status == "clean" + assert text == self.ORIGINAL + + def test_corrected_with_changes(self): + corrected_pr = self.ORIGINAL.replace("500 widget", "300 widget") + raw = ( + "[CORRECTED]\n%s\n\n" + "CHANGES:\n1. Changed '500 widget variants' to '300 widget variants' " + "-- company website lists 300." % corrected_pr + ) + text, status, changes = apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "300 widget" in text + assert "500" not in text + assert "300 widget variants" in changes + + def test_corrected_without_changes_section(self): + corrected_pr = self.ORIGINAL.replace("500", "300") + raw = "[CORRECTED]\n%s" % corrected_pr + text, status, changes = apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "300" in text + assert changes == "" + + def test_empty_output_returns_skipped(self): + text, status, changes = apply_fact_check("", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_whitespace_only_returns_skipped(self): + text, status, changes = apply_fact_check(" \n ", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_garbage_output_returns_skipped(self): + text, status, changes = apply_fact_check( + "I reviewed the press release and it looks good overall.", self.ORIGINAL + ) + assert status == "skipped" + assert text == self.ORIGINAL + + def test_rejects_oversized_rewrite(self): + bloated = self.ORIGINAL + "\n\n" + self.ORIGINAL + "\n\nExtra content." + raw = "[CORRECTED]\n%s\n\nCHANGES:\n1. Added more detail." % bloated + text, status, changes = apply_fact_check(raw, self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + assert "word count delta" in changes + + def test_accepts_minor_word_count_change(self): + minor_edit = self.ORIGINAL.replace("500 widget variants", "480 widget variants") + raw = ( + "[CORRECTED]\n%s\n\n" + "CHANGES:\n1. Corrected variant count from 500 to 480." % minor_edit + ) + text, status, changes = apply_fact_check(raw, self.ORIGINAL) + assert status == "corrected" + assert "480" in text + + def test_corrected_empty_body_returns_skipped(self): + text, status, changes = apply_fact_check("[CORRECTED]\n", self.ORIGINAL) + assert status == "skipped" + assert text == self.ORIGINAL + + +class TestBuildFactCheckPrompt: + """Tests for build_fact_check_prompt structure.""" + + def test_includes_ground_truth_data(self): + prompt = build_fact_check_prompt( + "Some PR text.", + company_name="Acme Corp", + url="https://acme.com", + topic="widgets", + keyword="industrial widgets", + ) + assert "Acme Corp" in prompt + assert "https://acme.com" in prompt + assert "widgets" in prompt + assert "industrial widgets" in prompt + assert "GROUND TRUTH" in prompt + + def test_includes_pr_text(self): + prompt = build_fact_check_prompt( + "The quick brown fox.", + company_name="Test", + url="https://test.com", + topic="foxes", + keyword="brown fox", + ) + assert "The quick brown fox." in prompt + + def test_output_format_instructions(self): + prompt = build_fact_check_prompt( + "Text.", company_name="X", url="u", topic="t", keyword="k" + ) + assert "[NO_ERRORS]" in prompt + assert "[CORRECTED]" in prompt + assert "CHANGES:" in prompt