From 3b4a8e47be7a02ef236ab1ce83d80d71fd26f7b8 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Fri, 20 Feb 2026 12:11:11 -0600 Subject: [PATCH] Require IMSURL for Cora pipeline, fail early if missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove placeholder URL fallback from ingest-cora args. Add early validation in run_cora_backlinks and folder watcher — if IMSURL is empty, block the task with a notification instead of running with a fake URL. Update tests to pass money_site_url and add missing-URL test. Co-Authored-By: Claude Opus 4.6 --- cheddahbot/scheduler.py | 26 ++++++++++++++++- cheddahbot/tools/linkbuilding.py | 8 ++++-- tests/test_linkbuilding.py | 49 +++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/cheddahbot/scheduler.py b/cheddahbot/scheduler.py index 556475d..54be1ff 100644 --- a/cheddahbot/scheduler.py +++ b/cheddahbot/scheduler.py @@ -551,10 +551,34 @@ class Scheduler: ) # Build tool args from the matched task's custom fields + money_site_url = matched_task.custom_fields.get("IMSURL", "") or "" + if not money_site_url: + log.warning("Task %s (%s) missing IMSURL — skipping", task_id, matched_task.name) + self.db.kv_set( + kv_key, + json.dumps( + { + "status": "blocked", + "reason": "missing_imsurl", + "filename": filename, + "task_id": task_id, + "task_name": matched_task.name, + "checked_at": datetime.now(UTC).isoformat(), + } + ), + ) + self._notify( + f"Folder watcher: **{filename}** matched task **{matched_task.name}** " + f"but **IMSURL is empty**. Set the IMSURL field in ClickUp before " + f"the file can be processed.", + category="linkbuilding", + ) + return + args = { "xlsx_path": str(xlsx_path), "project_name": matched_task.name, - "money_site_url": matched_task.custom_fields.get("IMSURL", ""), + "money_site_url": money_site_url, "custom_anchors": matched_task.custom_fields.get("CustomAnchors", "") or "", "cli_flags": matched_task.custom_fields.get("CLIFlags", "") or "", "clickup_task_id": task_id, diff --git a/cheddahbot/tools/linkbuilding.py b/cheddahbot/tools/linkbuilding.py index e66a96e..2ec466a 100644 --- a/cheddahbot/tools/linkbuilding.py +++ b/cheddahbot/tools/linkbuilding.py @@ -75,11 +75,8 @@ def _build_ingest_args( """Construct CLI argument list for ingest-cora command.""" args = ["ingest-cora", "-f", xlsx_path, "-n", project_name] - # Always pass -m to prevent interactive stdin prompt if money_site_url: args.extend(["-m", money_site_url]) - else: - args.extend(["-m", "https://placeholder.example.com"]) if branded_plus_ratio and branded_plus_ratio != 0.7: args.extend(["-bp", str(branded_plus_ratio)]) @@ -466,6 +463,11 @@ def run_cora_backlinks( return "Error: xlsx_path is required for Cora Backlinks pipeline." if not project_name: return "Error: project_name is required for Cora Backlinks pipeline." + if not money_site_url: + return ( + "Error: money_site_url (IMSURL) is required for Cora Backlinks pipeline. " + "Set the IMSURL custom field on the ClickUp task before processing." + ) blm_dir = _get_blm_dir(ctx) diff --git a/tests/test_linkbuilding.py b/tests/test_linkbuilding.py index 26b56a2..0ef94ca 100644 --- a/tests/test_linkbuilding.py +++ b/tests/test_linkbuilding.py @@ -165,21 +165,21 @@ class TestParseGenerateOutput: class TestBuildIngestArgs: def test_basic_args(self): - args = _build_ingest_args("/tmp/test.xlsx", "My Project") + args = _build_ingest_args("/tmp/test.xlsx", "My Project", money_site_url="https://example.com") assert args[0] == "ingest-cora" assert "-f" in args assert args[args.index("-f") + 1] == "/tmp/test.xlsx" assert "-n" in args assert args[args.index("-n") + 1] == "My Project" - assert "-m" in args # always present + assert "-m" in args def test_with_money_site_url(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", money_site_url="https://example.com") assert args[args.index("-m") + 1] == "https://example.com" - def test_placeholder_url_when_empty(self): + def test_no_m_flag_when_empty(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj") - assert args[args.index("-m") + 1] == "https://placeholder.example.com" + assert "-m" not in args def test_custom_branded_plus_ratio(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", branded_plus_ratio=0.5) @@ -305,9 +305,18 @@ class TestRunCoraBacklinks: result = run_cora_backlinks(xlsx_path="/fake.xlsx", project_name="", ctx=mock_ctx) assert "Error" in result + def test_missing_money_site_url(self, mock_ctx): + result = run_cora_backlinks( + xlsx_path="/fake.xlsx", project_name="Test", money_site_url="", ctx=mock_ctx + ) + assert "IMSURL" in result + def test_xlsx_not_found(self, mock_ctx): result = run_cora_backlinks( - xlsx_path="/nonexistent/file.xlsx", project_name="Test", ctx=mock_ctx + xlsx_path="/nonexistent/file.xlsx", + project_name="Test", + money_site_url="https://example.com", + ctx=mock_ctx, ) assert "not found" in result @@ -328,7 +337,10 @@ class TestRunCoraBacklinks: ) mock_cmd.side_effect = [ingest_proc, gen_proc] - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test Project", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test Project", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "Step 1: Ingest CORA Report" in result assert "Step 2: Generate Content Batch" in result @@ -344,7 +356,10 @@ class TestRunCoraBacklinks: args=[], returncode=1, stdout="Error: parsing failed", stderr="traceback" ) - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "Error" in result assert "ingest-cora failed" in result @@ -361,7 +376,10 @@ class TestRunCoraBacklinks: ) mock_cmd.side_effect = [ingest_proc, gen_proc] - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "Step 1: Ingest CORA Report" in result # Step 1 succeeded assert "generate-batch failed" in result @@ -372,7 +390,10 @@ class TestRunCoraBacklinks: mock_cmd.side_effect = subprocess.TimeoutExpired(cmd="test", timeout=1800) - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "timed out" in result @@ -575,7 +596,10 @@ class TestClickUpStateMachine: ) mock_cmd.side_effect = [ingest_proc, gen_proc] - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "ClickUp Sync" in result @@ -608,7 +632,10 @@ class TestClickUpStateMachine: args=[], returncode=1, stdout="Error", stderr="crash" ) - result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) + result = run_cora_backlinks( + xlsx_path=str(xlsx), project_name="Test", + money_site_url="https://example.com", ctx=mock_ctx, + ) assert "Error" in result raw = mock_ctx["db"].kv_get("clickup:task:task_fail:state")