"""Press-release pipeline tool. Autonomous workflow: 1. Generate 7 compliant headlines (chat brain) 2. AI judge picks the 2 best (chat brain) 3. Write 2 full press releases (execution brain x 2) 4. Generate 2 JSON-LD schemas (execution brain x 2, Sonnet + WebSearch) 5. Save 4 files, return cost summary """ from __future__ import annotations import json import logging import re import time from datetime import datetime from pathlib import Path from ..docx_export import text_to_docx from ..press_advantage import PressAdvantageClient from . import tool log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- _ROOT_DIR = Path(__file__).resolve().parent.parent.parent _SKILLS_DIR = _ROOT_DIR / "skills" _DATA_DIR = _ROOT_DIR / "data" _OUTPUT_DIR = _DATA_DIR / "generated" / "press_releases" _COMPANIES_FILE = _SKILLS_DIR / "companies.md" _HEADLINES_FILE = _SKILLS_DIR / "headlines.md" SONNET_CLI_MODEL = "sonnet" def _set_status(ctx: dict | None, message: str) -> None: """Log pipeline progress. Previously wrote to KV; now just logs.""" if message: log.info("[PR Pipeline] %s", message) def _fuzzy_company_match(name: str, candidate: str) -> bool: """Check if company_name fuzzy-matches a candidate string. Tries exact match, then substring containment in both directions. """ if not name or not candidate: return False a, b = name.lower().strip(), candidate.lower().strip() return a == b or a in b or b in a def _find_clickup_task(ctx: dict, company_name: str) -> str: """Query ClickUp API for a matching press-release task. Looks for "to do" tasks where Work Category == "Press Release" and the Client custom field fuzzy-matches company_name. If found: creates kv_store "executing" entry, moves to "in progress" on ClickUp, and returns the task ID. If not found: returns "" (tool runs without ClickUp sync). """ cu_client = _get_clickup_client(ctx) if not cu_client: return "" config = ctx.get("config") if not config or not config.clickup.space_id: return "" try: tasks = cu_client.get_tasks_from_space( config.clickup.space_id, statuses=["to do"], ) except Exception as e: log.warning("ClickUp API query failed in _find_clickup_task: %s", e) return "" finally: cu_client.close() # Find a task with Work Category == "Press Release" and Client matching company_name for task in tasks: if task.task_type != "Press Release": continue client_field = task.custom_fields.get("Client", "") if not ( _fuzzy_company_match(company_name, task.name) or _fuzzy_company_match(company_name, client_field) ): continue # Found a match — move to "automation underway" on ClickUp task_id = task.id # Move to "automation underway" on ClickUp cu_client2 = _get_clickup_client(ctx) if cu_client2: try: cu_client2.update_task_status(task_id, config.clickup.automation_status) except Exception as e: log.warning("Failed to update ClickUp status for %s: %s", task_id, e) finally: cu_client2.close() log.info("Auto-matched ClickUp task %s for company '%s'", task_id, company_name) return task_id return "" def _get_clickup_client(ctx: dict | None): """Create a ClickUpClient from tool context, or None if unavailable.""" if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled: return None try: from ..clickup import ClickUpClient config = ctx["config"] return ClickUpClient( api_token=config.clickup.api_token, workspace_id=config.clickup.workspace_id, task_type_field_name=config.clickup.task_type_field_name, ) except Exception as e: log.warning("Could not create ClickUp client: %s", e) return None # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _load_skill(filename: str) -> str: """Read a markdown skill file from the skills/ directory, stripping frontmatter.""" path = _SKILLS_DIR / filename if not path.exists(): raise FileNotFoundError(f"Skill file not found: {path}") text = path.read_text(encoding="utf-8") # Strip YAML frontmatter (--- ... ---) if present if text.startswith("---"): end = text.find("---", 3) if end != -1: text = text[end + 3 :].strip() return text def _load_file_if_exists(path: Path) -> str: """Read a file if it exists, return empty string otherwise.""" if path.exists(): return path.read_text(encoding="utf-8") return "" def _slugify(text: str) -> str: """Turn a headline into a filesystem-safe slug.""" text = text.lower().strip() text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[\s_]+", "-", text) return text[:60].strip("-") def _word_count(text: str) -> int: return len(text.split()) def _chat_call(agent, messages: list[dict]) -> str: """Make a non-streaming chat-brain call and return the full text.""" parts: list[str] = [] for chunk in agent.llm.chat(messages, tools=None, stream=False): if chunk["type"] == "text": parts.append(chunk["content"]) return "".join(parts) def _clean_pr_output(raw: str, headline: str) -> str: """Clean execution brain output to just the press release text. Strategy: find the headline we asked for in the output, take everything from that point forward. Strip any markdown formatting artifacts. """ # Normalize the headline for matching headline_lower = headline.strip().lower() lines = raw.strip().splitlines() # Try to find the exact headline in the output pr_start = None for i, line in enumerate(lines): clean_line = re.sub(r"\*\*", "", line).strip().lower() if clean_line == headline_lower: pr_start = i break # Fallback: find a line that contains most of the headline words if pr_start is None: headline_words = set(headline_lower.split()) for i, line in enumerate(lines): clean_line = re.sub(r"\*\*", "", line).strip().lower() line_words = set(clean_line.split()) # If >70% of headline words are in this line, it's probably the headline if len(headline_words & line_words) >= len(headline_words) * 0.7: pr_start = i break # If we still can't find it, just take the whole output if pr_start is None: pr_start = 0 # Rebuild from the headline forward result_lines = [] for line in lines[pr_start:]: # Strip markdown formatting line = re.sub(r"\*\*", "", line) line = re.sub(r"^#{1,6}\s+", "", line) result_lines.append(line) result = "\n".join(result_lines).strip() # Remove trailing horizontal rules result = re.sub(r"\n---\s*$", "", result).strip() return result # --------------------------------------------------------------------------- # Prompt builders # --------------------------------------------------------------------------- def _is_actual_news(topic: str) -> bool: """Detect whether the topic signals genuinely new news. Returns True if the topic contains explicit markers like 'actual news', 'new product', 'launch', 'acquisition', 'partnership', 'certification', or 'award'. The user is expected to signal this in the PR Topic field. """ signals = [ "actual news", "new product", "launch", "launches", "acquisition", "partnership", "certification", "award", "unveil", "unveils", "introduce", "introduces", ] topic_lower = topic.lower() return any(s in topic_lower for s in signals) def _build_headline_prompt( topic: str, company_name: str, url: str, lsi_terms: str, headlines_ref: str ) -> str: """Build the prompt for Step 1: generate 7 headlines.""" is_news = _is_actual_news(topic) prompt = ( f"Generate exactly 7 unique press release headline options for the following.\n\n" f"Topic: {topic}\n" f"Company: {company_name}\n" ) if url: prompt += f"Reference URL: {url}\n" if lsi_terms: prompt += f"LSI terms to consider: {lsi_terms}\n" prompt += ( "\nRules for EVERY headline:\n" "- Maximum 70 characters (including spaces)\n" "- Title case\n" "- NO location/geographic keywords\n" "- NO superlatives (best, top, leading, #1)\n" "- NO questions\n" "- NO colons — colons are considered lower quality\n" ) if is_news: prompt += ( "\nThis topic is ACTUAL NEWS — a real new event, product, partnership, " "or achievement. You may use announcement verbs like 'Announces', " "'Launches', 'Introduces', 'Unveils'.\n" ) else: prompt += ( "\nIMPORTANT — AWARENESS FRAMING:\n" "The company ALREADY offers this product/service/capability. Nothing is " "new, nothing was just launched, expanded, or achieved. You are writing " "an awareness piece about existing capabilities framed in news-wire style.\n\n" "REQUIRED verbs — use these: 'Highlights', 'Reinforces', 'Delivers', " "'Strengthens', 'Showcases', 'Details', 'Offers', 'Provides'\n\n" "BANNED — do NOT use any of these:\n" "- 'Announces', 'Launches', 'Introduces', 'Unveils', 'Expands', " "'Reveals', 'Announces New'\n" "- 'Significant expansion', 'major milestone', 'growing demand', " "'new capabilities', 'celebrates X years'\n" "- Any language that implies something CHANGED or is NEW when it is not\n" ) if headlines_ref: prompt += ( "\nHere are examples of high-quality headlines to use as reference " "for tone, structure, and length:\n\n" f"{headlines_ref}\n" ) prompt += ( "\nReturn ONLY a numbered list (1-7), one headline per line. " "No commentary, no character counts, just the headlines." ) return prompt def _build_judge_prompt(headlines: str, headlines_ref: str, topic: str = "") -> str: """Build the prompt for Step 2: pick the 2 best headlines.""" is_news = _is_actual_news(topic) prompt = ( "You are judging press release headlines for Press Advantage distribution. " "Pick the 2 best headlines from the candidates below.\n\n" "DISQUALIFY any headline that:\n" "- Contains a colon\n" "- Contains location/geographic keywords\n" "- Contains superlatives (best, top, leading, #1)\n" "- Is a question\n" "- Exceeds 70 characters\n" ) if is_news: prompt += ( "- (This topic IS actual news — announcement verbs are acceptable)\n\n" ) else: prompt += ( "- Uses 'Announces', 'Launches', 'Introduces', 'Unveils', 'Expands', " "'Reveals', or 'Announces New' (this is NOT actual news)\n" "- Implies something CHANGED, is NEW, or was just achieved when it was not " "(e.g. 'significant expansion', 'major milestone', 'growing demand')\n\n" ) prompt += ( "PREFER headlines that:\n" "- Match the tone and structure of the reference examples below\n" "- Use awareness verbs like 'Highlights', 'Strengthens', " "'Reinforces', 'Delivers', 'Showcases', 'Details'\n" "- Describe what the company DOES or OFFERS, not what it just invented\n" "- Read like a real news wire headline, not a product announcement\n\n" f"Candidates:\n{headlines}\n\n" ) if headlines_ref: prompt += ( "Reference headlines (these scored 77+ on quality — match their style):\n" f"{headlines_ref}\n\n" ) prompt += ( "Return ONLY the 2 best headlines, one per line, exactly as written in the candidates. " "No numbering, no commentary." ) return prompt def _derive_anchor_phrase(company_name: str, keyword: str) -> str: """Derive a 'brand + keyword' anchor phrase from company name and keyword. Examples: ("Advanced Industrial", "PEEK machining") -> "Advanced Industrial PEEK machining" ("Metal Craft", "custom metal fabrication") -> "Metal Craft custom metal fabrication" """ return f"{company_name} {keyword.strip()}" def _find_anchor_in_text(text: str, anchor: str) -> bool: """Check if the anchor phrase exists in the text (case-insensitive).""" return anchor.lower() in text.lower() def _fuzzy_find_anchor(text: str, company_name: str, topic: str) -> str | None: """Try to find a close match for the brand+keyword anchor in the text. Looks for the company name followed by topic-related words within a reasonable proximity (same sentence). """ text_lower = text.lower() company_lower = company_name.lower() # Extract key words from topic (skip short/common words) stop_words = {"a", "an", "the", "and", "or", "for", "in", "on", "of", "to", "with", "is", "are"} topic_words = [w for w in topic.lower().split() if w not in stop_words and len(w) > 2] if not topic_words: return None # Find all positions of company name in text start = 0 while True: pos = text_lower.find(company_lower, start) if pos == -1: break # Look at the surrounding context (next 80 chars after company name) context_start = pos context_end = min(pos + len(company_name) + 80, len(text)) context = text[context_start:context_end] # Check if any topic keyword appears near the company name context_lower = context.lower() for word in topic_words: if word in context_lower: # Extract the phrase from company name to end of the keyword match word_pos = context_lower.find(word) phrase_end = word_pos + len(word) candidate = context[:phrase_end].strip() # Clean: stop at sentence boundaries for sep in (".", ",", ";", "\n"): if sep in candidate[len(company_name) :]: break else: return candidate start = pos + 1 return None def _build_pr_prompt( headline: str, topic: str, company_name: str, url: str, lsi_terms: str, required_phrase: str, skill_text: str, companies_file: str, anchor_phrase: str = "", ) -> str: """Build the prompt for Step 3: write one full press release.""" is_news = _is_actual_news(topic) prompt = ( f"{skill_text}\n\n" "---\n\n" f"Write a press release using the headline below. " f"Follow every rule in the skill instructions above.\n\n" f"Headline: {headline}\n" f"Topic: {topic}\n" f"Company: {company_name}\n" ) if is_news: prompt += ( "\nThis is ACTUAL NEWS — a real new event, product, or achievement. " "You may use announcement language (announced, launched, introduced).\n" ) else: prompt += ( "\nAWARENESS FRAMING — CRITICAL:\n" "The company ALREADY offers this product/service/capability. Nothing new " "happened. Do NOT write that the company 'announced', 'expanded', 'launched', " "'achieved a milestone', or 'saw growing demand'. These are LIES if nothing " "actually changed.\n" "Instead write about what the company DOES, what it OFFERS, what it PROVIDES. " "Frame it as drawing attention to existing capabilities — highlighting, " "reinforcing, detailing, showcasing.\n" "The first paragraph should describe what the company offers, NOT announce " "a fictional event.\n" ) if url: prompt += f"Reference URL (fetch for context): {url}\n" if lsi_terms: prompt += f"LSI terms to integrate: {lsi_terms}\n" if required_phrase: prompt += f'Required phrase (use exactly once): "{required_phrase}"\n' if anchor_phrase: prompt += ( f"\nANCHOR TEXT REQUIREMENT: You MUST include the exact phrase " f'"{anchor_phrase}" somewhere naturally in the body of the press ' f"release. This phrase will be used as anchor text for an SEO link. " f"Work it into a sentence where it reads naturally — for example: " f'"As a {anchor_phrase.split(company_name, 1)[-1].strip()} provider, ' f'{company_name}..." or "{anchor_phrase} continues to...".\n' ) if companies_file: prompt += ( f"\nCompany directory — look up the executive name and title for {company_name}. " f"If the company is NOT listed below, use 'a company spokesperson' for quotes " f"instead of making up a name:\n" f"{companies_file}\n" ) prompt += ( "\nTarget 600-750 words. Minimum 575, maximum 800.\n\n" "CRITICAL OUTPUT RULES:\n" "- Output ONLY the press release text\n" "- Start with the headline on the first line, then the body\n" "- Do NOT include any commentary, reasoning, notes, or explanations\n" "- Do NOT use markdown formatting (no **, no ##, no ---)\n" "- Do NOT prefix with 'Here is the press release' or similar\n" "- The very first line of your output must be the headline" ) return prompt def _build_schema_prompt(pr_text: str, company_name: str, url: str, skill_text: str) -> str: """Build the prompt for Step 4: generate JSON-LD schema for one PR.""" prompt = ( f"{skill_text}\n\n" "---\n\n" "Generate a NewsArticle JSON-LD schema for the press release below. " "Follow every rule in the skill instructions above. " "Use WebSearch to find Wikipedia URLs for each entity.\n\n" "CRITICAL OUTPUT RULES:\n" "- Output ONLY valid JSON\n" "- No markdown fences, no commentary, no explanations\n" "- The very first character of your output must be {\n" ) prompt += f"\nCompany name: {company_name}\n\nPress release text:\n{pr_text}" return prompt # --------------------------------------------------------------------------- # Main tool # --------------------------------------------------------------------------- @tool( "write_press_releases", description=( "Full autonomous press-release pipeline. Generates 7 headlines, " "AI-picks the best 2, writes 2 complete press releases (600-750 words each), " "generates JSON-LD schema for each, and saves all files. " "Returns both press releases, both schemas, file paths, and a cost summary. " "Use when the user asks to write, create, or draft a press release." ), category="content", ) def write_press_releases( topic: str, company_name: str, url: str = "", keyword: str = "", lsi_terms: str = "", required_phrase: str = "", ctx: dict | None = None, ) -> str: """Run the full press-release pipeline and return results + cost summary.""" if not ctx or "agent" not in ctx: return "Error: press release tool requires agent context." agent = ctx["agent"] # clickup_task_id is injected via ctx by the ToolRegistry (never from LLM) clickup_task_id = ctx.get("clickup_task_id", "") # Fallback: auto-lookup from ClickUp API when invoked from chat (no task ID in ctx) if not clickup_task_id and ctx.get("config"): clickup_task_id = _find_clickup_task(ctx, company_name) if clickup_task_id: log.info("Chat-invoked PR: auto-linked to ClickUp task %s", clickup_task_id) # ── ClickUp: set "in progress" and post starting comment ──────────── cu_client = None if clickup_task_id: cu_client = _get_clickup_client(ctx) if cu_client: try: config = ctx["config"] cu_client.update_task_status(clickup_task_id, config.clickup.automation_status) cu_client.add_comment( clickup_task_id, f"[STARTED]CheddahBot starting press release creation.\n\n" f"Topic: {topic}\nCompany: {company_name}", ) log.info("ClickUp task %s set to automation-underway", clickup_task_id) except Exception as e: log.warning("ClickUp start-sync failed for %s: %s", clickup_task_id, e) # Load skill prompts try: pr_skill = _load_skill("press_release_prompt.md") schema_skill = _load_skill("press-release-schema.md") except FileNotFoundError as e: return f"Error: {e}" # Load reference files companies_file = _load_file_if_exists(_COMPANIES_FILE) headlines_ref = _load_file_if_exists(_HEADLINES_FILE) # Ensure output directory (company subfolder) company_slug = _slugify(company_name) output_dir = _OUTPUT_DIR / company_slug output_dir.mkdir(parents=True, exist_ok=True) today = datetime.now().strftime("%Y-%m-%d") cost_log: list[dict] = [] # ── Step 1: Generate 7 headlines (chat brain) ───────────────────────── log.info("[PR Pipeline] Step 1/4: Generating 7 headlines for %s...", company_name) _set_status(ctx, f"Step 1/4: Generating 7 headlines for {company_name}...") step_start = time.time() headline_prompt = _build_headline_prompt(topic, company_name, url, lsi_terms, headlines_ref) messages = [ {"role": "system", "content": "You are a senior press-release headline writer."}, {"role": "user", "content": headline_prompt}, ] headlines_raw = _chat_call(agent, messages) cost_log.append( { "step": "1. Generate 7 headlines", "model": agent.llm.current_model, "elapsed_s": round(time.time() - step_start, 1), } ) if not headlines_raw.strip(): return "Error: headline generation returned empty result." # Save all 7 headline candidates to file slug_base = _slugify(f"{company_name}-{topic}") headlines_file = output_dir / f"{slug_base}_{today}_headlines.txt" headlines_file.write_text(headlines_raw.strip(), encoding="utf-8") # ── Step 2: AI judge picks best 2 (chat brain) ─────────────────────── log.info("[PR Pipeline] Step 2/4: AI judge selecting best 2 headlines...") _set_status(ctx, "Step 2/4: AI judge selecting best 2 headlines...") step_start = time.time() judge_prompt = _build_judge_prompt(headlines_raw, headlines_ref, topic) messages = [ {"role": "system", "content": "You are a senior PR editor."}, {"role": "user", "content": judge_prompt}, ] judge_result = _chat_call(agent, messages) cost_log.append( { "step": "2. Judge picks best 2", "model": agent.llm.current_model, "elapsed_s": round(time.time() - step_start, 1), } ) # Parse the two winning headlines winners = [ line.strip().lstrip("0123456789.-) ") for line in judge_result.strip().splitlines() if line.strip() ] if len(winners) < 2: all_headlines = [ line.strip().lstrip("0123456789.-) ") for line in headlines_raw.strip().splitlines() if line.strip() ] winners = ( all_headlines[:2] if len(all_headlines) >= 2 else [all_headlines[0], all_headlines[0]] if all_headlines else ["Headline A", "Headline B"] ) winners = winners[:2] # ── Step 3: Write 2 press releases (execution brain x 2) ───────────── log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...") anchor_phrase = _derive_anchor_phrase(company_name, keyword) if keyword else "" pr_texts: list[str] = [] pr_files: list[str] = [] docx_files: list[str] = [] anchor_warnings: list[str] = [] for i, headline in enumerate(winners): log.info("[PR Pipeline] Writing PR %d/2: %s", i + 1, headline[:60]) _set_status(ctx, f"Step 3/4: Writing press release {i + 1}/2 — {headline[:60]}...") step_start = time.time() pr_prompt = _build_pr_prompt( headline, topic, company_name, url, lsi_terms, required_phrase, pr_skill, companies_file, anchor_phrase=anchor_phrase, ) exec_tools = "Bash,Read,Edit,Write,Glob,Grep,WebFetch" raw_result = agent.execute_task(pr_prompt, tools=exec_tools) elapsed = round(time.time() - step_start, 1) cost_log.append( { "step": f"3{chr(97 + i)}. Write PR '{headline[:40]}...'", "model": "execution-brain (default)", "elapsed_s": elapsed, } ) # Clean output: find the headline, strip preamble and markdown clean_result = _clean_pr_output(raw_result, headline) pr_texts.append(clean_result) # Validate word count wc = _word_count(clean_result) if wc < 575 or wc > 800: log.warning("PR %d word count %d outside 575-800 range", i + 1, wc) # Validate anchor phrase (only when keyword provided) if anchor_phrase and _find_anchor_in_text(clean_result, anchor_phrase): log.info("PR %d contains anchor phrase '%s'", i + 1, anchor_phrase) elif anchor_phrase: fuzzy = _fuzzy_find_anchor(clean_result, company_name, keyword) if fuzzy: log.info("PR %d: exact anchor not found, fuzzy match: '%s'", i + 1, fuzzy) anchor_warnings.append( f'PR {chr(65 + i)}: Exact anchor phrase "{anchor_phrase}" not found. ' f'Closest match: "{fuzzy}" — you may want to adjust before submitting.' ) else: log.warning("PR %d: anchor phrase '%s' NOT found", i + 1, anchor_phrase) anchor_warnings.append( f'PR {chr(65 + i)}: Anchor phrase "{anchor_phrase}" NOT found in the text. ' f"You'll need to manually add it before submitting to PA." ) # Save PR to file slug = _slugify(headline) filename = f"{slug}_{today}.txt" filepath = output_dir / filename filepath.write_text(clean_result, encoding="utf-8") pr_files.append(str(filepath)) # Also save as .docx for Google Docs import docx_path = output_dir / f"{slug}_{today}.docx" text_to_docx(clean_result, docx_path) docx_files.append(str(docx_path)) # ── ClickUp: upload docx attachments + comment ───────────────────── uploaded_count = 0 failed_uploads: list[str] = [] if clickup_task_id and cu_client: try: for path in docx_files: if cu_client.upload_attachment(clickup_task_id, path): uploaded_count += 1 else: failed_uploads.append(path) log.warning("ClickUp: failed to upload %s for task %s", path, clickup_task_id) upload_warning = "" if failed_uploads: paths_list = "\n".join(f" - {p}" for p in failed_uploads) upload_warning = ( f"\n[WARNING]Warning: {len(failed_uploads)} attachment(s) failed to upload. " f"Files saved locally at:\n{paths_list}" ) cu_client.add_comment( clickup_task_id, f"📎 Saved {len(docx_files)} press release(s). " f"{uploaded_count} file(s) attached.\n" f"Generating JSON-LD schemas next...{upload_warning}", ) log.info( "ClickUp: uploaded %d attachments for task %s", uploaded_count, clickup_task_id ) except Exception as e: log.warning("ClickUp attachment upload failed for %s: %s", clickup_task_id, e) # ── Step 4: Generate 2 JSON-LD schemas (Sonnet + WebSearch) ─────────── log.info("[PR Pipeline] Step 4/4: Generating 2 JSON-LD schemas...") schema_texts: list[str] = [] schema_files: list[str] = [] for i, pr_text in enumerate(pr_texts): log.info("[PR Pipeline] Schema %d/2 for: %s", i + 1, winners[i][:60]) _set_status(ctx, f"Step 4/4: Generating schema {i + 1}/2...") step_start = time.time() schema_prompt = _build_schema_prompt(pr_text, company_name, url, schema_skill) exec_tools = "WebSearch,WebFetch" result = agent.execute_task( schema_prompt, tools=exec_tools, model=SONNET_CLI_MODEL, ) elapsed = round(time.time() - step_start, 1) cost_log.append( { "step": f"4{chr(97 + i)}. Schema for PR {i + 1}", "model": SONNET_CLI_MODEL, "elapsed_s": elapsed, } ) # Extract clean JSON and force correct mainEntityOfPage schema_json = _extract_json(result) if schema_json: try: schema_obj = json.loads(schema_json) if url: schema_obj["mainEntityOfPage"] = url schema_json = json.dumps(schema_obj, indent=2) except json.JSONDecodeError: log.warning("Schema %d is not valid JSON", i + 1) schema_texts.append(schema_json or result) # Save schema to file slug = _slugify(winners[i]) filename = f"{slug}_{today}_schema.json" filepath = output_dir / filename filepath.write_text(schema_json or result, encoding="utf-8") schema_files.append(str(filepath)) # ── Build final output ──────────────────────────────────────────────── _set_status(ctx, "") # Clear status — pipeline complete total_elapsed = sum(c["elapsed_s"] for c in cost_log) log.info("[PR Pipeline] Complete for %s — %.0fs total", company_name, total_elapsed) output_parts = [] for i in range(2): label = chr(65 + i) # A, B wc = _word_count(pr_texts[i]) output_parts.append(f"## Press Release {label}: {winners[i]}") output_parts.append(f"**Word count:** {wc}") output_parts.append(f"**File:** `{pr_files[i]}`") output_parts.append(f"**Docx:** `{docx_files[i]}`\n") output_parts.append(pr_texts[i]) output_parts.append("\n---\n") output_parts.append(f"### Schema {label}") output_parts.append(f"**File:** `{schema_files[i]}`\n") output_parts.append(f"```json\n{schema_texts[i]}\n```") output_parts.append("\n---\n") # Anchor text warnings if anchor_warnings: output_parts.append("## Anchor Text Warnings\n") output_parts.append(f'Required anchor phrase: **"{anchor_phrase}"**\n') for warning in anchor_warnings: output_parts.append(f"- {warning}") output_parts.append("") # Cost summary table output_parts.append("## Cost Summary\n") output_parts.append("| Step | Model | Time (s) |") output_parts.append("|------|-------|----------|") for c in cost_log: output_parts.append(f"| {c['step']} | {c['model']} | {c['elapsed_s']} |") output_parts.append(f"| **Total** | | **{round(total_elapsed, 1)}** |") # ── ClickUp: completion — status to review + final comment ────────── if clickup_task_id and cu_client: try: config = ctx["config"] # Post completion comment attach_note = f"\n📎 {uploaded_count} file(s) attached." if uploaded_count else "" result_text = "\n".join(output_parts)[:3000] comment = ( f"[DONE]CheddahBot completed this task.\n\n" f"Skill: write_press_releases\n" f"Result:\n{result_text}{attach_note}" ) cu_client.add_comment(clickup_task_id, comment) # Set status to pr needs review cu_client.update_task_status(clickup_task_id, config.clickup.pr_review_status) output_parts.append("\n## ClickUp Sync\n") output_parts.append(f"- Task `{clickup_task_id}` updated") output_parts.append(f"- {uploaded_count} file(s) uploaded") output_parts.append(f"- Status set to '{config.clickup.pr_review_status}'") log.info("ClickUp sync complete for task %s", clickup_task_id) except Exception as e: log.error("ClickUp sync failed for task %s: %s", clickup_task_id, e) output_parts.append("\n## ClickUp Sync\n") output_parts.append(f"- **Sync failed:** {e}") output_parts.append("- Press release results are still valid above") finally: cu_client.close() # ── Client delivery: Drive upload + Gmail draft ────────────────── if clickup_task_id and docx_files: try: from ..delivery import deliver_to_client delivery_result = deliver_to_client( files=[Path(f) for f in docx_files], company_name=company_name, task_id=clickup_task_id, task_type="Press Release", ctx=ctx, ) output_parts.append("\n## Client Delivery\n") if delivery_result.doc_links: output_parts.append( "- Google Docs: " + ", ".join(delivery_result.doc_links) ) if delivery_result.draft_id: output_parts.append( "- Gmail draft created (ID: %s)" % delivery_result.draft_id ) if delivery_result.errors: for err in delivery_result.errors: output_parts.append("- Warning: %s" % err) except Exception as e: log.warning("Client delivery failed: %s", e) output_parts.append("\n## Client Delivery\n- Failed: %s" % e) return "\n".join(output_parts) def _parse_company_org_ids(companies_text: str) -> dict[str, int]: """Parse companies.md and return {company_name_lower: pa_org_id}.""" mapping: dict[str, int] = {} current_company = "" for line in companies_text.splitlines(): line = line.strip() if line.startswith("## "): current_company = line[3:].strip() elif line.startswith("- **PA Org ID:**") and current_company: try: org_id = int(line.split(":**")[1].strip()) mapping[current_company.lower()] = org_id except (ValueError, IndexError): pass return mapping def _parse_company_data(companies_text: str) -> dict[str, dict]: """Parse companies.md and return full company data keyed by lowercase name. Returns dict like: {"advanced industrial": {"org_id": 19634, "website": "...", "gbp": "..."}} """ companies: dict[str, dict] = {} current_company = "" current_data: dict = {} for line in companies_text.splitlines(): line = line.strip() if line.startswith("## "): if current_company and current_data: companies[current_company.lower()] = current_data current_company = line[3:].strip() current_data = {"name": current_company} elif current_company: if line.startswith("- **PA Org ID:**"): try: # noqa: SIM105 current_data["org_id"] = int(line.split(":**")[1].strip()) except (ValueError, IndexError): pass elif line.startswith("- **Website:**"): current_data["website"] = line.split(":**")[1].strip() elif line.startswith("- **GBP:**"): current_data["gbp"] = line.split(":**")[1].strip() # Don't forget the last company if current_company and current_data: companies[current_company.lower()] = current_data return companies def _fuzzy_match_company(name: str, candidates: dict[str, int]) -> int | None: """Try to match a company name against the org ID mapping. Tries exact match first, then substring containment in both directions. """ name_lower = name.lower().strip() # Exact match if name_lower in candidates: return candidates[name_lower] # Substring: input contains a known company name, or vice versa for key, org_id in candidates.items(): if key in name_lower or name_lower in key: return org_id return None def _fuzzy_match_company_data(name: str, candidates: dict[str, dict]) -> dict | None: """Try to match a company name against company data. Same fuzzy logic as _fuzzy_match_company but returns the full data dict. """ name_lower = name.lower().strip() # Exact match if name_lower in candidates: return candidates[name_lower] # Substring: input contains a known company name, or vice versa for key, data in candidates.items(): if key in name_lower or name_lower in key: return data return None def _text_to_html(text: str, links: list[dict] | None = None) -> str: """Convert plain text to HTML with link injection. Args: text: Plain text press release body. links: List of dicts with 'url' and 'anchor' keys. Each anchor's first occurrence in the text is wrapped in an tag. Returns: HTML string with

tags and injected links. """ # Inject anchor text links before paragraph splitting if links: for link in links: anchor = link.get("anchor", "") url = link.get("url", "") if anchor and url: # Replace first occurrence only html_link = f'{anchor}' text = text.replace(anchor, html_link, 1) # Split into paragraphs on double newlines paragraphs = re.split(r"\n\s*\n", text.strip()) html_parts = [] for para in paragraphs: # Collapse internal newlines to spaces within a paragraph para = re.sub(r"\s*\n\s*", " ", para).strip() if not para: continue # Convert bare URLs to links (skip already-linked ones) para = re.sub( r'(?)(https?://\S+)', r'\1', para, ) html_parts.append(f"

{para}

") return "\n".join(html_parts) def _extract_json(text: str) -> str | None: """Try to pull a JSON object out of LLM output (strip fences, prose, etc).""" stripped = text.strip() if stripped.startswith("{"): try: json.loads(stripped) return stripped except json.JSONDecodeError: pass # Strip markdown fences fence_match = re.search(r"```(?:json)?\s*\n?([\s\S]*?)\n?```", text) if fence_match: candidate = fence_match.group(1).strip() try: json.loads(candidate) return candidate except json.JSONDecodeError: pass # Last resort: find first { to last } start = text.find("{") end = text.rfind("}") if start != -1 and end != -1 and end > start: candidate = text[start : end + 1] try: json.loads(candidate) return candidate except json.JSONDecodeError: pass return None # --------------------------------------------------------------------------- # Submit tool # --------------------------------------------------------------------------- def _resolve_branded_url(branded_url: str, company_data: dict | None) -> str: """Resolve the branded link URL. - "GBP" (case-insensitive) → look up GBP from company data - A real URL → use as-is - Empty → fall back to company website """ if branded_url.strip().upper() == "GBP": if company_data and company_data.get("gbp"): return company_data["gbp"] log.warning("GBP shortcut used but no GBP URL in companies.md") return "" if branded_url.strip(): return branded_url.strip() # Fallback to homepage if company_data and company_data.get("website"): return company_data["website"] return "" def _build_links( pr_text: str, company_name: str, keyword: str, target_url: str, branded_url_resolved: str, ) -> tuple[list[dict], list[str]]: """Build the link list for HTML injection and return (links, warnings). Link 1: brand+keyword anchor → target_url (IMSURL) Link 2: company name anchor → branded_url (SocialURL / homepage / GBP) """ links: list[dict] = [] warnings: list[str] = [] # Link 1: brand+keyword → target_url if target_url and keyword: anchor_phrase = _derive_anchor_phrase(company_name, keyword) if _find_anchor_in_text(pr_text, anchor_phrase): links.append({"url": target_url, "anchor": anchor_phrase}) else: # Try fuzzy match fuzzy = _fuzzy_find_anchor(pr_text, company_name, keyword) if fuzzy: links.append({"url": target_url, "anchor": fuzzy}) warnings.append( f'Brand+keyword link: exact phrase "{anchor_phrase}" not found. ' f'Used fuzzy match: "{fuzzy}"' ) else: warnings.append( f'Brand+keyword link: anchor phrase "{anchor_phrase}" NOT found in PR text. ' f"Link to {target_url} could not be injected — add it manually in PA." ) # Link 2: branded → social/homepage/GBP if branded_url_resolved: # Use company name as anchor — it will always be in the PR if _find_anchor_in_text(pr_text, company_name): links.append({"url": branded_url_resolved, "anchor": company_name}) else: warnings.append( f'Branded link: company name "{company_name}" not found in PR text. ' f"Link to {branded_url_resolved} could not be injected." ) return links, warnings @tool( "submit_press_release", description=( "Submit a press release to Press Advantage as a draft. Takes the PR text " "(or file path), headline, company name, target URL (IMSURL), and branded " "URL (SocialURL). Auto-constructs SEO links: brand+keyword anchor → target " "URL, company name → branded URL. If branded_url is 'GBP', uses the Google " "Business Profile URL from companies.md. Converts to HTML, resolves the PA " "organization ID, and creates a draft for review. Will NOT auto-publish." ), category="content", ) def submit_press_release( headline: str, company_name: str, target_url: str = "", branded_url: str = "", keyword: str = "", topic: str = "", pr_text: str = "", file_path: str = "", description: str = "", ctx: dict | None = None, ) -> str: """Submit a finished press release to Press Advantage as a draft.""" # --- Get config --- if not ctx or "config" not in ctx: return "Error: submit_press_release requires agent context." config = ctx["config"] api_key = config.press_advantage.api_key if not api_key: return ( "Error: PRESS_ADVANTAGE_API key not configured. " "Set the PRESS_ADVANTAGE_API environment variable in .env." ) # --- Get PR text --- if not pr_text and file_path: path = Path(file_path) if not path.exists(): return f"Error: file not found: {file_path}" pr_text = path.read_text(encoding="utf-8") if not pr_text: return "Error: provide either pr_text or file_path with the press release content." # --- Validate word count --- wc = _word_count(pr_text) if wc < 550: return ( f"Error: press release is only {wc} words. " f"Press Advantage requires at least 550 words. Please expand the content." ) # --- Load company data --- companies_text = _load_file_if_exists(_COMPANIES_FILE) company_all = _parse_company_data(companies_text) company_data = _fuzzy_match_company_data(company_name, company_all) # --- Look up PA org ID --- org_id = company_data.get("org_id") if company_data else None # Fallback: try live API lookup if org_id is None: log.info("Org ID not found in companies.md for '%s', trying live API...", company_name) org_mapping = _parse_company_org_ids(companies_text) org_id = _fuzzy_match_company(company_name, org_mapping) if org_id is None: try: client = PressAdvantageClient(api_key) try: orgs = client.get_organizations() api_mapping: dict[str, int] = {} for org in orgs: org_name = org.get("name", "") oid = org.get("id") if org_name and oid: api_mapping[org_name.lower()] = int(oid) org_id = _fuzzy_match_company(company_name, api_mapping) finally: client.close() except Exception as e: log.warning("Failed to fetch orgs from PA API: %s", e) if org_id is None: return ( f"Error: could not find Press Advantage organization for '{company_name}'. " f"Add a 'PA Org ID' entry to skills/companies.md or check the company name." ) # --- Build links --- branded_url_resolved = _resolve_branded_url(branded_url, company_data) link_list, link_warnings = _build_links( pr_text, company_name, keyword, target_url, branded_url_resolved, ) # --- Convert to HTML --- html_body = _text_to_html(pr_text, link_list) # --- Auto-generate description if not provided --- if not description: keyword = headline for part in [company_name, "Inc.", "LLC", "Corp.", "Ltd.", "Limited", "Inc"]: keyword = keyword.replace(part, "").strip() keyword = re.sub(r"\s+", " ", keyword).strip(" -\u2013\u2014,") description = f"{company_name} - {keyword}" if keyword else company_name # --- Submit to PA --- log.info("Submitting PR to Press Advantage: org=%d, title='%s'", org_id, headline[:60]) client = PressAdvantageClient(api_key) try: result = client.create_release( org_id=org_id, title=headline, body=html_body, description=description, distribution="standard", schedule_distribution="false", ) except Exception as e: return f"Error submitting to Press Advantage: {e}" finally: client.close() # --- Format response --- release_id = result.get("id", "unknown") status = result.get("state", result.get("status", "draft")) output_parts = [ "Press release submitted to Press Advantage as a DRAFT.\n", f"- **Release ID:** {release_id}", f"- **Status:** {status}", f"- **Organization:** {company_name} (ID: {org_id})", f"- **Title:** {headline}", f"- **Word count:** {wc}", f"- **Links injected:** {len(link_list)}", ] if link_list: output_parts.append("\n**Links:**") for link in link_list: output_parts.append(f' - "{link["anchor"]}" -> {link["url"]}') if link_warnings: output_parts.append("\n**Link warnings:**") for warning in link_warnings: output_parts.append(f" - {warning}") output_parts.append( "\n**Next step:** Review and approve in the Press Advantage dashboard before publishing." ) return "\n".join(output_parts)