From 5e9cef5e3976fc1dd1cdde46127a9a62184e2a49 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Mon, 16 Feb 2026 17:37:41 -0600 Subject: [PATCH] Add submit_press_release tool with Press Advantage API integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds PressAdvantageClient API wrapper and submit_press_release tool that posts finished press releases to PA as drafts. Auto-constructs SEO links (brand+keyword → IMSURL, company name → SocialURL/GBP/homepage) with fuzzy anchor matching and warnings when phrases can't be found. The PR writing prompt now requests anchor text phrases and validates them after generation. Co-Authored-By: Claude Opus 4.6 --- cheddahbot/press_advantage.py | 81 +++ cheddahbot/tools/press_release.py | 316 ++++++++++- config.yaml | 2 + plans/press_advantage_api_integration.md | 31 +- skills/companies.md | 71 ++- tests/test_press_advantage.py | 679 +++++++++++++++++++++++ 6 files changed, 1134 insertions(+), 46 deletions(-) create mode 100644 cheddahbot/press_advantage.py create mode 100644 tests/test_press_advantage.py diff --git a/cheddahbot/press_advantage.py b/cheddahbot/press_advantage.py new file mode 100644 index 0000000..c1bf9e4 --- /dev/null +++ b/cheddahbot/press_advantage.py @@ -0,0 +1,81 @@ +"""Press Advantage REST API client.""" + +from __future__ import annotations + +import logging + +import httpx + +log = logging.getLogger(__name__) + + +class PressAdvantageClient: + """Thin wrapper around the Press Advantage API.""" + + BASE_URL = "https://app.pressadvantage.com" + + def __init__(self, api_key: str): + self._api_key = api_key + self._client = httpx.Client(base_url=self.BASE_URL, timeout=30.0) + + def _params(self, **extra) -> dict: + """Build query params with api_key auth.""" + return {"api_key": self._api_key, **extra} + + def get_organizations(self) -> list[dict]: + """GET /api/customers/organizations.json — list all orgs.""" + resp = self._client.get( + "/api/customers/organizations.json", + params=self._params(), + ) + resp.raise_for_status() + return resp.json() + + def create_release( + self, + org_id: int, + title: str, + body: str, + description: str, + distribution: str = "standard", + schedule_distribution: str = "false", + ) -> dict: + """POST /api/customers/releases/with_content.json — create a draft release. + + Uses form data with release[field] nested params (not JSON). + """ + resp = self._client.post( + "/api/customers/releases/with_content.json", + params=self._params(), + data={ + "release[organization_id]": org_id, + "release[title]": title, + "release[body]": body, + "release[description]": description, + "release[distribution]": distribution, + "release[schedule_distribution]": schedule_distribution, + }, + ) + resp.raise_for_status() + return resp.json() + + def get_release(self, release_id: int) -> dict: + """GET /api/customers/releases/{id}.json — get release details.""" + resp = self._client.get( + f"/api/customers/releases/{release_id}.json", + params=self._params(), + ) + resp.raise_for_status() + return resp.json() + + def get_built_urls(self, release_id: int) -> list[dict]: + """GET /api/customers/releases/{id}/built_urls.json — get published URLs.""" + resp = self._client.get( + f"/api/customers/releases/{release_id}/built_urls.json", + params=self._params(), + ) + resp.raise_for_status() + return resp.json() + + def close(self): + self._client.close() diff --git a/cheddahbot/tools/press_release.py b/cheddahbot/tools/press_release.py index 2c66dbe..c48d40f 100644 --- a/cheddahbot/tools/press_release.py +++ b/cheddahbot/tools/press_release.py @@ -211,9 +211,75 @@ def _build_judge_prompt(headlines: str, headlines_ref: str) -> str: return prompt +def _derive_anchor_phrase(company_name: str, topic: str) -> str: + """Derive a 'brand + keyword' anchor phrase from company name and topic. + + Examples: + ("Advanced Industrial", "PEEK machining") -> "Advanced Industrial PEEK machining" + ("Metal Craft", "custom metal fabrication") -> "Metal Craft custom metal fabrication" + """ + # Clean up topic: strip leading articles, lowercase + keyword = topic.strip() + return f"{company_name} {keyword}" + + +def _find_anchor_in_text(text: str, anchor: str) -> bool: + """Check if the anchor phrase exists in the text (case-insensitive).""" + return anchor.lower() in text.lower() + + +def _fuzzy_find_anchor(text: str, company_name: str, topic: str) -> str | None: + """Try to find a close match for the brand+keyword anchor in the text. + + Looks for the company name followed by topic-related words within + a reasonable proximity (same sentence). + """ + text_lower = text.lower() + company_lower = company_name.lower() + + # Extract key words from topic (skip short/common words) + stop_words = {"a", "an", "the", "and", "or", "for", "in", "on", "of", "to", "with", "is", "are"} + topic_words = [w for w in topic.lower().split() if w not in stop_words and len(w) > 2] + + if not topic_words: + return None + + # Find all positions of company name in text + start = 0 + while True: + pos = text_lower.find(company_lower, start) + if pos == -1: + break + + # Look at the surrounding context (next 80 chars after company name) + context_start = pos + context_end = min(pos + len(company_name) + 80, len(text)) + context = text[context_start:context_end] + + # Check if any topic keyword appears near the company name + context_lower = context.lower() + for word in topic_words: + if word in context_lower: + # Extract the phrase from company name to end of the keyword match + word_pos = context_lower.find(word) + phrase_end = word_pos + len(word) + candidate = context[:phrase_end].strip() + # Clean: stop at sentence boundaries + for sep in (".", ",", ";", "\n"): + if sep in candidate[len(company_name):]: + break + else: + return candidate + + start = pos + 1 + + return None + + def _build_pr_prompt(headline: str, topic: str, company_name: str, url: str, lsi_terms: str, required_phrase: str, - skill_text: str, companies_file: str) -> str: + skill_text: str, companies_file: str, + anchor_phrase: str = "") -> str: """Build the prompt for Step 3: write one full press release.""" prompt = ( f"{skill_text}\n\n" @@ -231,6 +297,16 @@ def _build_pr_prompt(headline: str, topic: str, company_name: str, if required_phrase: prompt += f'Required phrase (use exactly once): "{required_phrase}"\n' + if anchor_phrase: + prompt += ( + f'\nANCHOR TEXT REQUIREMENT: You MUST include the exact phrase ' + f'"{anchor_phrase}" somewhere naturally in the body of the press ' + f'release. This phrase will be used as anchor text for an SEO link. ' + f'Work it into a sentence where it reads naturally — for example: ' + f'"As a {anchor_phrase.split(company_name, 1)[-1].strip()} provider, ' + f'{company_name}..." or "{anchor_phrase} continues to...".\n' + ) + if companies_file: prompt += ( f"\nCompany directory — look up the executive name and title for {company_name}. " @@ -370,9 +446,11 @@ def write_press_releases( # ── Step 3: Write 2 press releases (execution brain × 2) ───────────── log.info("[PR Pipeline] Step 3/4: Writing 2 press releases...") + anchor_phrase = _derive_anchor_phrase(company_name, topic) pr_texts: list[str] = [] pr_files: list[str] = [] docx_files: list[str] = [] + anchor_warnings: list[str] = [] for i, headline in enumerate(winners): log.info("[PR Pipeline] Writing PR %d/2: %s", i + 1, headline[:60]) _set_status(ctx, f"Step 3/4: Writing press release {i+1}/2 — {headline[:60]}...") @@ -380,6 +458,7 @@ def write_press_releases( pr_prompt = _build_pr_prompt( headline, topic, company_name, url, lsi_terms, required_phrase, pr_skill, companies_file, + anchor_phrase=anchor_phrase, ) exec_tools = "Bash,Read,Edit,Write,Glob,Grep,WebFetch" raw_result = agent.execute_task(pr_prompt, tools=exec_tools) @@ -399,6 +478,24 @@ def write_press_releases( if wc < 575 or wc > 800: log.warning("PR %d word count %d outside 575-800 range", i + 1, wc) + # Validate anchor phrase + if _find_anchor_in_text(clean_result, anchor_phrase): + log.info("PR %d contains anchor phrase '%s'", i + 1, anchor_phrase) + else: + fuzzy = _fuzzy_find_anchor(clean_result, company_name, topic) + if fuzzy: + log.info("PR %d: exact anchor not found, fuzzy match: '%s'", i + 1, fuzzy) + anchor_warnings.append( + f"PR {chr(65+i)}: Exact anchor phrase \"{anchor_phrase}\" not found. " + f"Closest match: \"{fuzzy}\" — you may want to adjust before submitting." + ) + else: + log.warning("PR %d: anchor phrase '%s' NOT found", i + 1, anchor_phrase) + anchor_warnings.append( + f"PR {chr(65+i)}: Anchor phrase \"{anchor_phrase}\" NOT found in the text. " + f"You'll need to manually add it before submitting to PA." + ) + # Save PR to file slug = _slugify(headline) filename = f"{slug}_{today}.txt" @@ -472,6 +569,14 @@ def write_press_releases( output_parts.append(f"```json\n{schema_texts[i]}\n```") output_parts.append("\n---\n") + # Anchor text warnings + if anchor_warnings: + output_parts.append("## Anchor Text Warnings\n") + output_parts.append(f"Required anchor phrase: **\"{anchor_phrase}\"**\n") + for warning in anchor_warnings: + output_parts.append(f"- {warning}") + output_parts.append("") + # Cost summary table output_parts.append("## Cost Summary\n") output_parts.append("| Step | Model | Time (s) |") @@ -500,6 +605,39 @@ def _parse_company_org_ids(companies_text: str) -> dict[str, int]: return mapping +def _parse_company_data(companies_text: str) -> dict[str, dict]: + """Parse companies.md and return full company data keyed by lowercase name. + + Returns dict like: {"advanced industrial": {"org_id": 19634, "website": "...", "gbp": "..."}} + """ + companies: dict[str, dict] = {} + current_company = "" + current_data: dict = {} + for line in companies_text.splitlines(): + line = line.strip() + if line.startswith("## "): + if current_company and current_data: + companies[current_company.lower()] = current_data + current_company = line[3:].strip() + current_data = {"name": current_company} + elif current_company: + if line.startswith("- **PA Org ID:**"): + try: + current_data["org_id"] = int(line.split(":**")[1].strip()) + except (ValueError, IndexError): + pass + elif line.startswith("- **Website:**"): + current_data["website"] = line.split(":**")[1].strip() + elif line.startswith("- **GBP:**"): + current_data["gbp"] = line.split(":**")[1].strip() + + # Don't forget the last company + if current_company and current_data: + companies[current_company.lower()] = current_data + + return companies + + def _fuzzy_match_company(name: str, candidates: dict[str, int]) -> int | None: """Try to match a company name against the org ID mapping. @@ -519,6 +657,25 @@ def _fuzzy_match_company(name: str, candidates: dict[str, int]) -> int | None: return None +def _fuzzy_match_company_data(name: str, candidates: dict[str, dict]) -> dict | None: + """Try to match a company name against company data. + + Same fuzzy logic as _fuzzy_match_company but returns the full data dict. + """ + name_lower = name.lower().strip() + + # Exact match + if name_lower in candidates: + return candidates[name_lower] + + # Substring: input contains a known company name, or vice versa + for key, data in candidates.items(): + if key in name_lower or name_lower in key: + return data + + return None + + def _text_to_html(text: str, links: list[dict] | None = None) -> str: """Convert plain text to HTML with link injection. @@ -600,21 +757,96 @@ def _extract_json(text: str) -> str | None: # Submit tool # --------------------------------------------------------------------------- +def _resolve_branded_url(branded_url: str, company_data: dict | None) -> str: + """Resolve the branded link URL. + + - "GBP" (case-insensitive) → look up GBP from company data + - A real URL → use as-is + - Empty → fall back to company website + """ + if branded_url.strip().upper() == "GBP": + if company_data and company_data.get("gbp"): + return company_data["gbp"] + log.warning("GBP shortcut used but no GBP URL in companies.md") + return "" + + if branded_url.strip(): + return branded_url.strip() + + # Fallback to homepage + if company_data and company_data.get("website"): + return company_data["website"] + + return "" + + +def _build_links( + pr_text: str, + company_name: str, + topic: str, + target_url: str, + branded_url_resolved: str, +) -> tuple[list[dict], list[str]]: + """Build the link list for HTML injection and return (links, warnings). + + Link 1: brand+keyword anchor → target_url (IMSURL) + Link 2: company name anchor → branded_url (SocialURL / homepage / GBP) + """ + links: list[dict] = [] + warnings: list[str] = [] + + # Link 1: brand+keyword → target_url + if target_url: + anchor_phrase = _derive_anchor_phrase(company_name, topic) + if _find_anchor_in_text(pr_text, anchor_phrase): + links.append({"url": target_url, "anchor": anchor_phrase}) + else: + # Try fuzzy match + fuzzy = _fuzzy_find_anchor(pr_text, company_name, topic) + if fuzzy: + links.append({"url": target_url, "anchor": fuzzy}) + warnings.append( + f"Brand+keyword link: exact phrase \"{anchor_phrase}\" not found. " + f"Used fuzzy match: \"{fuzzy}\"" + ) + else: + warnings.append( + f"Brand+keyword link: anchor phrase \"{anchor_phrase}\" NOT found in PR text. " + f"Link to {target_url} could not be injected — add it manually in PA." + ) + + # Link 2: branded → social/homepage/GBP + if branded_url_resolved: + # Use company name as anchor — it will always be in the PR + if _find_anchor_in_text(pr_text, company_name): + links.append({"url": branded_url_resolved, "anchor": company_name}) + else: + warnings.append( + f"Branded link: company name \"{company_name}\" not found in PR text. " + f"Link to {branded_url_resolved} could not be injected." + ) + + return links, warnings + + @tool( "submit_press_release", description=( "Submit a press release to Press Advantage as a draft. Takes the PR text " - "(or file path), headline, company name, and links to inject. Converts to " - "HTML, resolves the PA organization ID, and creates a draft release for " - "review. The release will NOT auto-publish — Bryan must review and approve " - "it in the PA dashboard." + "(or file path), headline, company name, target URL (IMSURL), and branded " + "URL (SocialURL). Auto-constructs SEO links: brand+keyword anchor → target " + "URL, company name → branded URL. If branded_url is 'GBP', uses the Google " + "Business Profile URL from companies.md. Converts to HTML, resolves the PA " + "organization ID, and creates a draft for review. Will NOT auto-publish." ), category="content", ) def submit_press_release( headline: str, company_name: str, - links: str = "", + target_url: str = "", + branded_url: str = "", + topic: str = "", pr_text: str = "", file_path: str = "", description: str = "", @@ -651,30 +883,32 @@ def submit_press_release( f"Press Advantage requires at least 550 words. Please expand the content." ) - # --- Parse links --- - link_list: list[dict] = [] - if links: - try: - link_list = json.loads(links) - except json.JSONDecodeError: - return "Error: 'links' must be a valid JSON array, e.g. '[{\"url\": \"...\", \"anchor\": \"...\"}]'" + # --- Derive topic from headline if not provided --- + if not topic: + topic = headline + for part in [company_name, "Inc.", "LLC", "Corp.", "Ltd.", "Limited", "Inc"]: + topic = topic.replace(part, "").strip() + topic = re.sub(r"\s+", " ", topic).strip(" -\u2013\u2014,") - # --- Convert to HTML --- - html_body = _text_to_html(pr_text, link_list) + # --- Load company data --- + companies_text = _load_file_if_exists(_COMPANIES_FILE) + company_all = _parse_company_data(companies_text) + company_data = _fuzzy_match_company_data(company_name, company_all) # --- Look up PA org ID --- - companies_text = _load_file_if_exists(_COMPANIES_FILE) - org_mapping = _parse_company_org_ids(companies_text) - org_id = _fuzzy_match_company(company_name, org_mapping) + org_id = company_data.get("org_id") if company_data else None # Fallback: try live API lookup if org_id is None: log.info("Org ID not found in companies.md for '%s', trying live API...", company_name) + org_mapping = _parse_company_org_ids(companies_text) + org_id = _fuzzy_match_company(company_name, org_mapping) + + if org_id is None: try: client = PressAdvantageClient(api_key) try: orgs = client.get_organizations() - # Build a mapping from API results and try fuzzy match api_mapping: dict[str, int] = {} for org in orgs: org_name = org.get("name", "") @@ -693,13 +927,20 @@ def submit_press_release( f"Add a 'PA Org ID' entry to skills/companies.md or check the company name." ) + # --- Build links --- + branded_url_resolved = _resolve_branded_url(branded_url, company_data) + link_list, link_warnings = _build_links( + pr_text, company_name, topic, target_url, branded_url_resolved, + ) + + # --- Convert to HTML --- + html_body = _text_to_html(pr_text, link_list) + # --- Auto-generate description if not provided --- if not description: - # Extract a keyword from the headline (drop the company name, take remaining key phrase) keyword = headline for part in [company_name, "Inc.", "LLC", "Corp.", "Ltd.", "Limited", "Inc"]: keyword = keyword.replace(part, "").strip() - # Clean up and take first meaningful chunk keyword = re.sub(r"\s+", " ", keyword).strip(" -\u2013\u2014,") description = f"{company_name} - {keyword}" if keyword else company_name @@ -723,13 +964,28 @@ def submit_press_release( # --- Format response --- release_id = result.get("id", "unknown") status = result.get("state", result.get("status", "draft")) - return ( - f"Press release submitted to Press Advantage as a DRAFT.\n\n" - f"- **Release ID:** {release_id}\n" - f"- **Status:** {status}\n" - f"- **Organization:** {company_name} (ID: {org_id})\n" - f"- **Title:** {headline}\n" - f"- **Word count:** {wc}\n" - f"- **Links injected:** {len(link_list)}\n\n" - f"**Next step:** Review and approve in the Press Advantage dashboard before publishing." + + output_parts = [ + "Press release submitted to Press Advantage as a DRAFT.\n", + f"- **Release ID:** {release_id}", + f"- **Status:** {status}", + f"- **Organization:** {company_name} (ID: {org_id})", + f"- **Title:** {headline}", + f"- **Word count:** {wc}", + f"- **Links injected:** {len(link_list)}", + ] + + if link_list: + output_parts.append("\n**Links:**") + for link in link_list: + output_parts.append(f" - \"{link['anchor']}\" → {link['url']}") + + if link_warnings: + output_parts.append("\n**Link warnings:**") + for warning in link_warnings: + output_parts.append(f" - {warning}") + + output_parts.append( + "\n**Next step:** Review and approve in the Press Advantage dashboard before publishing." ) + return "\n".join(output_parts) diff --git a/config.yaml b/config.yaml index ba2dd99..b818620 100644 --- a/config.yaml +++ b/config.yaml @@ -54,3 +54,5 @@ clickup: field_mapping: topic: "task_name" company_name: "Client" + target_url: "IMSURL" + branded_url: "SocialURL" diff --git a/plans/press_advantage_api_integration.md b/plans/press_advantage_api_integration.md index 60672a2..6d56ae2 100644 --- a/plans/press_advantage_api_integration.md +++ b/plans/press_advantage_api_integration.md @@ -1,8 +1,8 @@ # Press Advantage API Integration Plan -## Status: Blocked — waiting on PA support to fix API access +## Status: submit_press_release tool IMPLEMENTED -API key is in `.env` as `PRESS_ADVANTAGE_API`. Auth works (`api_token` query param) but returns "account is cancelled or past due" on all endpoints. Emailed PA support. +API key is in `.env` as `PRESS_ADVANTAGE_API`. Auth via `api_key` query param on `app.pressadvantage.com`. Test release: #81505 (draft) @@ -26,18 +26,20 @@ Test release: #81505 (draft) ## What To Build -### 1. `submit_press_release` tool -- New `@tool` in `cheddahbot/tools/` -- Takes: PR text (or file path), headline, organization_id, distribution type -- Calls `POST /api/customers/releases/with_content.json` -- Params: `release[organization_id]`, `release[title]`, `release[body]`, `release[distribution]`, `release[schedule_distribution]` -- Returns: release ID, status -- Need to figure out org ID mapping (company name → PA org ID) +### 1. `submit_press_release` tool — DONE +- `@tool` in `cheddahbot/tools/press_release.py` +- Takes: headline, company_name, links (JSON), pr_text or file_path, description +- Converts plain text to HTML with link injection (`_text_to_html`) +- Resolves company → PA org ID from `skills/companies.md` with API fallback +- Validates word count >= 550 +- Calls `POST /api/customers/releases/with_content.json` via `PressAdvantageClient` +- Creates as draft (never auto-publishes) +- 31 tests in `tests/test_press_advantage.py` -### 2. Org ID mapping -- `GET /api/customers/organizations.json` lists all orgs with IDs -- Could cache this or add a lookup tool -- Or add PA org IDs to `skills/companies.md` +### 2. Org ID mapping — DONE +- PA org IDs added to `skills/companies.md` for all 22 companies +- Fuzzy name matching with substring fallback +- Live API fallback via `GET /api/customers/organizations.json` if not in companies.md ### 3. Weekly nag emails (Track 2) - Time-driven, not chat-driven @@ -56,5 +58,6 @@ Test release: #81505 (draft) | `/api/customers/organizations.json` | GET | List orgs (get org IDs) | ## Auth -- Query param: `?api_token=` +- Base URL: `https://app.pressadvantage.com` +- Query param: `?api_key=` - Key stored in `.env` as `PRESS_ADVANTAGE_API` diff --git a/skills/companies.md b/skills/companies.md index caa978a..72aa2f4 100644 --- a/skills/companies.md +++ b/skills/companies.md @@ -2,70 +2,137 @@ ## McCormick Industries - **Executive:** Gary Hermsen, CEO +- **PA Org ID:** 19413 +- **Website:** +- **GBP:** ## MCM Composites - **Executive:** Michael Fredrich, CEO +- **PA Org ID:** 20563 +- **Website:** +- **GBP:** ## AGI Fabricators - **Executive:** Brad Landry, General Manager +- **PA Org ID:** 19412 +- **Website:** +- **GBP:** ## Dietz Electric - **Executive:** Mark Henson, Owner +- **PA Org ID:** 19545 +- **Website:** +- **GBP:** ## Metal Craft - **Executive:** Kyle, Vice President +- **PA Org ID:** 19800 +- **Website:** +- **GBP:** ## GullCo - **Executive:** Jeff Zook, Director +- **PA Org ID:** 20157 +- **Website:** +- **GBP:** ## MOD-TRONIC Instruments Limited - **Executive:** Steven Ruple, President +- **PA Org ID:** 19901 +- **Website:** +- **GBP:** ## Krueger Sentry Gauge - **Executive:** Lee Geurts, Vice President +- **PA Org ID:** 20862 +- **Website:** +- **GBP:** ## Chapter 2 Incorporated - **Executive:** Kyle Johnston, Senior Engineer +- **PA Org ID:** 19517 +- **Website:** +- **GBP:** ## Nicolet Plastics LLC - **Executive:** Brian Torres, Chief Commercial Officer +- **PA Org ID:** 19544 +- **Website:** +- **GBP:** ## Renown Electric Motors & Repairs Inc. - **Executive:** Jeff Collins, Partner +- **PA Org ID:** 19546 +- **Website:** +- **GBP:** ## RPM Mechanical Inc. - **Executive:** Mike McNeil, Vice President +- **PA Org ID:** 19395 +- **Website:** +- **GBP:** ## Green Bay Plastics - **Executive:** Michael Hogan, President +- **PA Org ID:** 20643 +- **Website:** +- **GBP:** ## Paragon Steel - **Executive:** Jim Stavis, President & CEO +- **PA Org ID:** 21025 +- **Website:** +- **GBP:** ## Hogge Precision - **Executive:** Danny Hogge Jr, President +- **PA Org ID:** 19411 +- **Website:** +- **GBP:** ## Axiomatic Global Electronic Solutions - **Executive:** Amanda Wilkins, Chief Marketing Officer +- **PA Org ID:** 19633 +- **Website:** +- **GBP:** ## Advanced Industrial - **Executive:** Paul Cedrone, CEO +- **PA Org ID:** 19634 +- **Website:** +- **GBP:** ## ELIS Manufacturing and Packaging Solutions Inc. - **Executive:** Keith Vinson, Chief Executive Officer +- **PA Org ID:** 19656 +- **Website:** +- **GBP:** ## Lubrication Engineers - **Executive:** John Sander, Vice President of Research & Development +- **PA Org ID:** 19449 +- **Website:** +- **GBP:** ## FZE Industrial - **Executive:** Doug Pribyl, CEO +- **PA Org ID:** 22377 +- **Website:** +- **GBP:** ## Machine Specialty & Manufacturing (MSM) - **Executive:** Max Hutson, Vice President of Operations +- **PA Org ID:** 19418 +- **Website:** +- **GBP:** ## DCA - **Executive:** Errol Gelhaar (title unknown) +- **PA Org ID:** 19448 +- **Website:** +- **GBP:** ## EVR Products - -- **Executive:** Gary Waldick, Vice President of EVR Products \ No newline at end of file +- **Executive:** Gary Waldick, Vice President of EVR Products +- **Website:** +- **GBP:** diff --git a/tests/test_press_advantage.py b/tests/test_press_advantage.py new file mode 100644 index 0000000..96dd00f --- /dev/null +++ b/tests/test_press_advantage.py @@ -0,0 +1,679 @@ +"""Tests for Press Advantage API client and submit_press_release tool.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import httpx +import pytest +import respx + +from cheddahbot.press_advantage import PressAdvantageClient +from cheddahbot.tools.press_release import ( + _build_links, + _derive_anchor_phrase, + _find_anchor_in_text, + _fuzzy_find_anchor, + _fuzzy_match_company, + _fuzzy_match_company_data, + _parse_company_data, + _parse_company_org_ids, + _resolve_branded_url, + _text_to_html, + submit_press_release, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +SAMPLE_COMPANIES_MD = """\ +# Company Directory + +## Advanced Industrial +- **Executive:** Paul Cedrone, CEO +- **PA Org ID:** 19634 +- **Website:** https://advancedindustrial.com +- **GBP:** https://maps.google.com/maps?cid=1234567890 + +## Metal Craft +- **Executive:** Kyle, Vice President +- **PA Org ID:** 19800 +- **Website:** https://metalcraft.com +- **GBP:** + +## RPM Mechanical Inc. +- **Executive:** Mike McNeil, Vice President +- **PA Org ID:** 19395 +- **Website:** +- **GBP:** +""" + +# PR text that contains company name and a brand+keyword phrase +REALISTIC_PR_TEXT = ( + "Advanced Industrial is a leader in precision manufacturing. " + + "The company specializes in CNC machining and related services. " + + "Advanced Industrial PEEK machining capabilities have expanded significantly " + + "over the past year, enabling the company to serve new markets. " + + " ".join(["filler"] * 530) +) + +LONG_PR_TEXT = " ".join(["word"] * 600) # 600-word dummy PR + + +@pytest.fixture() +def pa_config(): + """Minimal config mock with press_advantage settings.""" + config = MagicMock() + config.press_advantage.api_key = "test-api-key" + return config + + +@pytest.fixture() +def submit_ctx(pa_config): + """Context dict for submit_press_release.""" + return {"config": pa_config} + + +# --------------------------------------------------------------------------- +# PressAdvantageClient tests +# --------------------------------------------------------------------------- + +class TestPressAdvantageClient: + + @respx.mock + def test_get_organizations(self): + respx.get( + "https://app.pressadvantage.com/api/customers/organizations.json", + ).mock(return_value=httpx.Response( + 200, + json=[ + {"id": 19634, "name": "Advanced Industrial"}, + {"id": 19800, "name": "Metal Craft"}, + ], + )) + + client = PressAdvantageClient("test-key") + try: + orgs = client.get_organizations() + finally: + client.close() + + assert len(orgs) == 2 + assert orgs[0]["name"] == "Advanced Industrial" + + @respx.mock + def test_create_release_success(self): + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response( + 200, + json={"id": 99999, "state": "draft", "title": "Test Headline"}, + )) + + client = PressAdvantageClient("test-key") + try: + result = client.create_release( + org_id=19634, + title="Test Headline", + body="

Body text

", + description="Test description", + ) + finally: + client.close() + + assert result["id"] == 99999 + assert result["state"] == "draft" + + @respx.mock + def test_create_release_sends_form_data(self): + route = respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 1})) + + client = PressAdvantageClient("test-key") + try: + client.create_release( + org_id=19634, + title="My Title", + body="

Body

", + description="Desc", + distribution="standard", + schedule_distribution="false", + ) + finally: + client.close() + + request = route.calls.last.request + content = request.content.decode() + assert "release%5Btitle%5D=My+Title" in content or "release[title]" in content + + @respx.mock + def test_get_release(self): + respx.get( + "https://app.pressadvantage.com/api/customers/releases/81505.json", + ).mock(return_value=httpx.Response( + 200, + json={"id": 81505, "state": "draft", "title": "Test"}, + )) + + client = PressAdvantageClient("test-key") + try: + result = client.get_release(81505) + finally: + client.close() + + assert result["id"] == 81505 + + @respx.mock + def test_get_built_urls(self): + respx.get( + "https://app.pressadvantage.com/api/customers/releases/81505/built_urls.json", + ).mock(return_value=httpx.Response( + 200, + json=[{"url": "https://example.com/press-release"}], + )) + + client = PressAdvantageClient("test-key") + try: + urls = client.get_built_urls(81505) + finally: + client.close() + + assert len(urls) == 1 + + @respx.mock + def test_api_key_sent_as_query_param(self): + route = respx.get( + "https://app.pressadvantage.com/api/customers/organizations.json", + ).mock(return_value=httpx.Response(200, json=[])) + + client = PressAdvantageClient("my-secret-key") + try: + client.get_organizations() + finally: + client.close() + + request = route.calls.last.request + assert "api_key=my-secret-key" in str(request.url) + + +# --------------------------------------------------------------------------- +# Company data parsing tests +# --------------------------------------------------------------------------- + +class TestParseCompanyOrgIds: + def test_parses_all_companies(self): + mapping = _parse_company_org_ids(SAMPLE_COMPANIES_MD) + assert mapping["advanced industrial"] == 19634 + assert mapping["metal craft"] == 19800 + assert mapping["rpm mechanical inc."] == 19395 + assert len(mapping) == 3 + + def test_empty_input(self): + assert _parse_company_org_ids("") == {} + + def test_missing_org_id(self): + text = "## Some Company\n- **Executive:** John Doe, CEO\n" + assert _parse_company_org_ids(text) == {} + + +class TestParseCompanyData: + def test_parses_full_data(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + assert data["advanced industrial"]["org_id"] == 19634 + assert data["advanced industrial"]["website"] == "https://advancedindustrial.com" + assert data["advanced industrial"]["gbp"] == "https://maps.google.com/maps?cid=1234567890" + + def test_parses_empty_fields(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + assert data["metal craft"]["website"] == "https://metalcraft.com" + assert data["metal craft"]["gbp"] == "" + + def test_preserves_company_name(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + assert data["advanced industrial"]["name"] == "Advanced Industrial" + + def test_empty_input(self): + assert _parse_company_data("") == {} + + +class TestFuzzyMatchCompany: + def test_exact_match(self): + mapping = {"advanced industrial": 19634, "metal craft": 19800} + assert _fuzzy_match_company("Advanced Industrial", mapping) == 19634 + + def test_substring_match_input_contains_key(self): + mapping = {"metal craft": 19800} + assert _fuzzy_match_company("Metal Craft Industries", mapping) == 19800 + + def test_substring_match_key_contains_input(self): + mapping = {"rpm mechanical inc.": 19395} + assert _fuzzy_match_company("RPM Mechanical", mapping) == 19395 + + def test_no_match(self): + mapping = {"advanced industrial": 19634} + assert _fuzzy_match_company("Totally Unknown Corp", mapping) is None + + +class TestFuzzyMatchCompanyData: + def test_exact_match(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + result = _fuzzy_match_company_data("Advanced Industrial", data) + assert result is not None + assert result["org_id"] == 19634 + + def test_substring_match(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + result = _fuzzy_match_company_data("RPM Mechanical", data) + assert result is not None + assert result["org_id"] == 19395 + + def test_no_match(self): + data = _parse_company_data(SAMPLE_COMPANIES_MD) + assert _fuzzy_match_company_data("Unknown Corp", data) is None + + +# --------------------------------------------------------------------------- +# Anchor phrase helpers +# --------------------------------------------------------------------------- + +class TestDeriveAnchorPhrase: + def test_basic(self): + assert _derive_anchor_phrase("Advanced Industrial", "PEEK machining") == "Advanced Industrial PEEK machining" + + def test_strips_whitespace(self): + assert _derive_anchor_phrase("Metal Craft", " custom fabrication ") == "Metal Craft custom fabrication" + + +class TestFindAnchorInText: + def test_exact_match(self): + text = "Advanced Industrial PEEK machining is our specialty." + assert _find_anchor_in_text(text, "Advanced Industrial PEEK machining") is True + + def test_case_insensitive(self): + text = "advanced industrial peek machining is great." + assert _find_anchor_in_text(text, "Advanced Industrial PEEK Machining") is True + + def test_not_found(self): + text = "This text has nothing relevant." + assert _find_anchor_in_text(text, "Advanced Industrial PEEK machining") is False + + +class TestFuzzyFindAnchor: + def test_finds_nearby_keyword(self): + text = "Advanced Industrial provides excellent PEEK solutions to clients." + result = _fuzzy_find_anchor(text, "Advanced Industrial", "PEEK machining") + assert result is not None + assert "Advanced Industrial" in result + assert "PEEK" in result + + def test_returns_none_when_no_match(self): + text = "This text mentions nothing relevant at all." + result = _fuzzy_find_anchor(text, "Advanced Industrial", "PEEK machining") + assert result is None + + def test_returns_none_when_company_missing(self): + text = "PEEK machining is great but no company name here." + result = _fuzzy_find_anchor(text, "Advanced Industrial", "PEEK machining") + assert result is None + + +# --------------------------------------------------------------------------- +# Branded URL resolution +# --------------------------------------------------------------------------- + +class TestResolveBrandedUrl: + def test_literal_url(self): + data = {"website": "https://example.com", "gbp": "https://maps.google.com/123"} + assert _resolve_branded_url("https://linkedin.com/company/acme", data) == "https://linkedin.com/company/acme" + + def test_gbp_shortcut(self): + data = {"website": "https://example.com", "gbp": "https://maps.google.com/maps?cid=123"} + assert _resolve_branded_url("GBP", data) == "https://maps.google.com/maps?cid=123" + + def test_gbp_case_insensitive(self): + data = {"gbp": "https://maps.google.com/maps?cid=123"} + assert _resolve_branded_url("gbp", data) == "https://maps.google.com/maps?cid=123" + + def test_gbp_shortcut_no_gbp_url(self): + data = {"website": "https://example.com", "gbp": ""} + assert _resolve_branded_url("GBP", data) == "" + + def test_empty_falls_back_to_website(self): + data = {"website": "https://example.com", "gbp": ""} + assert _resolve_branded_url("", data) == "https://example.com" + + def test_empty_no_company_data(self): + assert _resolve_branded_url("", None) == "" + + def test_empty_no_website(self): + data = {"website": "", "gbp": ""} + assert _resolve_branded_url("", data) == "" + + +# --------------------------------------------------------------------------- +# Link building +# --------------------------------------------------------------------------- + +class TestBuildLinks: + def test_both_links_found(self): + text = "Advanced Industrial PEEK machining is excellent. Advanced Industrial leads the way." + links, warnings = _build_links( + text, "Advanced Industrial", "PEEK machining", + "https://example.com/peek", "https://linkedin.com/company/ai", + ) + assert len(links) == 2 + assert links[0]["url"] == "https://example.com/peek" + assert links[0]["anchor"] == "Advanced Industrial PEEK machining" + assert links[1]["url"] == "https://linkedin.com/company/ai" + assert links[1]["anchor"] == "Advanced Industrial" + assert len(warnings) == 0 + + def test_no_urls_provided(self): + text = "Some text about Advanced Industrial." + links, warnings = _build_links(text, "Advanced Industrial", "PEEK", "", "") + assert len(links) == 0 + assert len(warnings) == 0 + + def test_brand_keyword_not_found_warns(self): + text = "This text has no relevant anchor phrases at all. " * 30 + links, warnings = _build_links( + text, "Advanced Industrial", "PEEK machining", + "https://example.com/peek", "", + ) + assert len(warnings) == 1 + assert "NOT found" in warnings[0] + + def test_fuzzy_match_used(self): + text = "Advanced Industrial provides excellent PEEK solutions to many clients worldwide." + links, warnings = _build_links( + text, "Advanced Industrial", "PEEK machining", + "https://example.com/peek", "", + ) + # Fuzzy should find "Advanced Industrial provides excellent PEEK" or similar + assert len(links) == 1 + assert links[0]["url"] == "https://example.com/peek" + assert len(warnings) == 1 + assert "fuzzy" in warnings[0].lower() + + +# --------------------------------------------------------------------------- +# Text to HTML +# --------------------------------------------------------------------------- + +class TestTextToHtml: + def test_basic_paragraphs(self): + text = "First paragraph.\n\nSecond paragraph." + html = _text_to_html(text) + assert html == "

First paragraph.

\n

Second paragraph.

" + + def test_link_injection(self): + text = "As a PEEK plastic manufacturer, Advanced Industrial specializes in precision." + links = [{"url": "https://cncplastics.com/", "anchor": "PEEK plastic manufacturer"}] + html = _text_to_html(text, links) + assert 'PEEK plastic manufacturer' in html + + def test_link_injection_first_occurrence_only(self): + text = "We do CNC machining. Our CNC machining is the best." + links = [{"url": "https://example.com", "anchor": "CNC machining"}] + html = _text_to_html(text, links) + assert html.count('CNC machining') == 1 + assert "Our CNC machining is the best" in html + + def test_bare_url_conversion(self): + text = "Visit https://example.com for more info." + html = _text_to_html(text) + assert 'https://example.com' in html + + def test_empty_text(self): + assert _text_to_html("") == "" + + def test_multiple_links(self): + text = "Company A does widget making. Company B does gadget building." + links = [ + {"url": "https://a.com", "anchor": "widget making"}, + {"url": "https://b.com", "anchor": "gadget building"}, + ] + html = _text_to_html(text, links) + assert 'widget making' in html + assert 'gadget building' in html + + def test_no_links(self): + text = "Simple paragraph." + html = _text_to_html(text, None) + assert html == "

Simple paragraph.

" + + +# --------------------------------------------------------------------------- +# submit_press_release tool tests +# --------------------------------------------------------------------------- + +class TestSubmitPressRelease: + def test_missing_api_key(self): + config = MagicMock() + config.press_advantage.api_key = "" + result = submit_press_release( + headline="Test", company_name="Acme", pr_text=LONG_PR_TEXT, + ctx={"config": config}, + ) + assert "PRESS_ADVANTAGE_API" in result + assert "Error" in result + + def test_missing_context(self): + result = submit_press_release( + headline="Test", company_name="Acme", pr_text=LONG_PR_TEXT, + ) + assert "Error" in result + + def test_no_pr_text_or_file(self, submit_ctx): + result = submit_press_release( + headline="Test", company_name="Advanced Industrial", + ctx=submit_ctx, + ) + assert "Error" in result + assert "pr_text or file_path" in result + + def test_word_count_too_low(self, submit_ctx): + short_text = " ".join(["word"] * 100) + result = submit_press_release( + headline="Test", company_name="Advanced Industrial", + pr_text=short_text, ctx=submit_ctx, + ) + assert "Error" in result + assert "550 words" in result + + def test_file_not_found(self, submit_ctx): + result = submit_press_release( + headline="Test", company_name="Advanced Industrial", + file_path="/nonexistent/file.txt", ctx=submit_ctx, + ) + assert "Error" in result + assert "file not found" in result + + @respx.mock + def test_successful_submission(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response( + 200, + json={"id": 88888, "state": "draft"}, + )) + + result = submit_press_release( + headline="Advanced Industrial Expands PEEK Machining", + company_name="Advanced Industrial", + pr_text=REALISTIC_PR_TEXT, + topic="PEEK machining", + target_url="https://advancedindustrial.com/peek", + ctx=submit_ctx, + ) + + assert "88888" in result + assert "DRAFT" in result + + @respx.mock + def test_branded_link_injected(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + route = respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 1, "state": "draft"})) + + result = submit_press_release( + headline="Advanced Industrial Expands PEEK Machining", + company_name="Advanced Industrial", + pr_text=REALISTIC_PR_TEXT, + topic="PEEK machining", + branded_url="https://linkedin.com/company/advanced-industrial", + ctx=submit_ctx, + ) + + assert "Links injected" in result + assert "Advanced Industrial" in result + + @respx.mock + def test_gbp_shortcut(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + route = respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 1, "state": "draft"})) + + result = submit_press_release( + headline="Advanced Industrial Expands PEEK Machining", + company_name="Advanced Industrial", + pr_text=REALISTIC_PR_TEXT, + topic="PEEK machining", + branded_url="GBP", + ctx=submit_ctx, + ) + + assert "maps.google.com" in result + + @respx.mock + def test_reads_from_file(self, submit_ctx, tmp_path, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + pr_file = tmp_path / "test_pr.txt" + pr_file.write_text(LONG_PR_TEXT, encoding="utf-8") + + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 77777, "state": "draft"})) + + result = submit_press_release( + headline="Test Headline", + company_name="Advanced Industrial", + file_path=str(pr_file), + ctx=submit_ctx, + ) + + assert "77777" in result + + @respx.mock + def test_company_not_found(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + respx.get( + "https://app.pressadvantage.com/api/customers/organizations.json", + ).mock(return_value=httpx.Response(200, json=[])) + + result = submit_press_release( + headline="Test", company_name="Totally Unknown Corp", + pr_text=LONG_PR_TEXT, ctx=submit_ctx, + ) + + assert "Error" in result + assert "could not find" in result + + @respx.mock + def test_api_fallback_org_lookup(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: "", + ) + + respx.get( + "https://app.pressadvantage.com/api/customers/organizations.json", + ).mock(return_value=httpx.Response( + 200, + json=[{"id": 12345, "name": "New Client Co"}], + )) + + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 55555, "state": "draft"})) + + result = submit_press_release( + headline="Test Headline", + company_name="New Client Co", + pr_text=LONG_PR_TEXT, + ctx=submit_ctx, + ) + + assert "55555" in result + + @respx.mock + def test_link_warning_when_anchor_not_found(self, submit_ctx, monkeypatch): + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 1, "state": "draft"})) + + # LONG_PR_TEXT is just "word word word..." — no anchor phrase possible + result = submit_press_release( + headline="Advanced Industrial Expands PEEK Machining", + company_name="Advanced Industrial", + pr_text=LONG_PR_TEXT, + topic="PEEK machining", + target_url="https://example.com/peek", + ctx=submit_ctx, + ) + + assert "warning" in result.lower() + assert "NOT found" in result + + @respx.mock + def test_topic_derived_from_headline(self, submit_ctx, monkeypatch): + """When topic is not provided, it's derived from headline minus company name.""" + monkeypatch.setattr( + "cheddahbot.tools.press_release._load_file_if_exists", + lambda p: SAMPLE_COMPANIES_MD, + ) + + respx.post( + "https://app.pressadvantage.com/api/customers/releases/with_content.json", + ).mock(return_value=httpx.Response(200, json={"id": 1, "state": "draft"})) + + result = submit_press_release( + headline="Advanced Industrial Expands PEEK Machining", + company_name="Advanced Industrial", + pr_text=LONG_PR_TEXT, + ctx=submit_ctx, + ) + + assert "DRAFT" in result